{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Illiberal Communication and Election Intervention During the Refugee Crisis in Germany**\n",
    "\n",
    "Ashrakat Elshehawy, Konstantin Gavras, Nikolay Marinov, Federico Nanni, Harald Schoen\n",
    "\n",
    "Perspectives on Politics\n",
    "\n",
    "Dataverse link:\n",
    "\n",
    "\"Replication Data for: Illiberal Communication and Election Intervention During the Refugee Crisis in Germany\", https://doi.org/10.7910/DVN/T2FZK3, Harvard Dataverse, DRAFT VERSION, UNF:6:L4g980UvlhsPseyzqCxmKw== [fileUNF]\n",
    "\n",
    "Sentiment Analysis Python Code"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Code to create a sentiment score for each media piece and party communication piece that is refugee-relevant. Code\n",
    "Implemented by Ashrakat Elshehawy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: Please change the path to all files to your local path"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load needed packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/ashrakatelshehawy/opt/anaconda3/lib/python3.8/site-packages/gensim/similarities/__init__.py:15: UserWarning: The gensim.similarities.levenshtein submodule is disabled, because the optional Levenshtein package <https://pypi.org/project/python-Levenshtein/> is unavailable. Install Levenhstein (e.g. `pip install python-Levenshtein`) to suppress this warning.\n",
      "  warnings.warn(msg)\n"
     ]
    }
   ],
   "source": [
    "import codecs, nltk, string, os, gensim\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import spacy\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.tag.sequential import ClassifierBasedTagger"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import  nltk, string #nltk library and the string library for removal of punctuatiom\n",
    "from nltk.corpus import stopwords #to remove stopwords\n",
    "from nltk.stem.wordnet import WordNetLemmatizer #lemmatization\n",
    "wordnet_lemmatizer = WordNetLemmatizer()#lemmatization\n",
    "from nltk.stem import SnowballStemmer#stemming\n",
    "snowball_stemmer = SnowballStemmer(\"german\") #stemming for english language\n",
    "exclude = set(string.punctuation) #to exclude punctuation\n",
    "stop_word_list = stopwords.words('german') #defining what stopwords are in the english language"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Wordembeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#word embeddings\n",
    "# add the path to the embedding_file, change path according to your folder \n",
    "embed_file = '/Users/ashrakatelshehawy/in-domain-embeddings.txt'\n",
    "emb_model = gensim.models.KeyedVectors.load_word2vec_format(embed_file, binary=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text Pre-Processing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "#define that you need to exclude punctuation\n",
    "exclude = set(string.punctuation)\n",
    "\n",
    "# this represent any text as a single \"doc-embedding\" we use it both for the query and the sentences\n",
    "# input should be a string\n",
    "def text_embedding(text):\n",
    "    \n",
    "    #this works to lower text\n",
    "    text = text.lower()\n",
    "    \n",
    "    # we tokenize the text in single words\n",
    "    text = nltk.tokenize.WordPunctTokenizer().tokenize(text)\n",
    "    \n",
    "    # we remove numbers and punctuation\n",
    "    text = [token for token in text if token not in exclude and token.isalpha()]\n",
    "    \n",
    "    doc_embed = []\n",
    "    \n",
    "    # for each word we get the embedding and we append it to a list\n",
    "    for word in text:\n",
    "            try:\n",
    "                embed_word = emb_model[word]\n",
    "                doc_embed.append(embed_word)\n",
    "            except KeyError: # if there is an error we continue\n",
    "                continue\n",
    "    # we average the embeddings of all the words, getting an overall doc embedding\n",
    "    if len(doc_embed)>0:\n",
    "        avg = [float(sum(col))/len(col) for col in zip(*doc_embed)]\n",
    "\n",
    "        avg = np.array(avg).reshape(1, -1)\n",
    "\n",
    "        # the output is a doc-embedding\n",
    "        return avg\n",
    "    else:\n",
    "        return \"Empty\"\n",
    "    \n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#you need to have pre-installed spacy in german\n",
    "\n",
    "exclude = set(string.punctuation)\n",
    "stop_word_list = stopwords.words('german')\n",
    "\n",
    "# input should be a string\n",
    "def nlp_pipeline(text):\n",
    "    \n",
    "    text = nltk.word_tokenize(text) #tokenization \n",
    "     #word_lower\n",
    "    text = [word.lower() for word in text]\n",
    "    # remove punctuation and numbers\n",
    "    text = [token for token in text if token not in exclude and token.isalpha()] \n",
    "    # remove stopwords \n",
    "    text = [token for token in text if token not in stop_word_list]\n",
    "\n",
    "    # the output is text\n",
    "    return text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prepare sentiment analysis\n",
    "## load and prepare dictionaries of positive and negative words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#load neg and positive dictionaries through pandas\n",
    "negative = pd.read_csv(\"/Users/ashrakatelshehawy/SentiWS_v1.8c_Negative.txt\", encoding='utf-8', delimiter='\\t',header=None)\n",
    "positive=pd.read_csv(\"/Users/ashrakatelshehawy/SentiWS_v1.8c_Positive.txt\", encoding='utf-8', delimiter='\\t',header=None)\n",
    "\n",
    "#rename heads\n",
    "negative.columns = ['main', 'value',\"words\"]\n",
    "positive.columns = ['main', 'value',\"words\"]\n",
    "\n",
    "#split each word in a row\n",
    "negative1=(negative.main.apply(lambda x: pd.Series(str(x).split(\"|\",2)))) \n",
    "positive1=(positive.main.apply(lambda x: pd.Series(str(x).split(\"|\",2)))) \n",
    "\n",
    "negative1.columns = ['main', 'notneeded']\n",
    "positive1.columns = ['main', 'notneeded']\n",
    "\n",
    "\n",
    "#delete unneeded column\n",
    "del positive1['notneeded']\n",
    "del negative1['notneeded']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>main</th>\n",
       "      <th>nlpprocessed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Abbau</td>\n",
       "      <td>[abbau]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Abbruch</td>\n",
       "      <td>[abbruch]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Abdankung</td>\n",
       "      <td>[abdankung]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Abdämpfung</td>\n",
       "      <td>[abdämpfung]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Abfall</td>\n",
       "      <td>[abfall]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1813</th>\n",
       "      <td>übersehen</td>\n",
       "      <td>[übersehen]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1814</th>\n",
       "      <td>übertreiben</td>\n",
       "      <td>[übertreiben]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1815</th>\n",
       "      <td>übertreten</td>\n",
       "      <td>[übertreten]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1816</th>\n",
       "      <td>übertrieben</td>\n",
       "      <td>[übertrieben]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1817</th>\n",
       "      <td>überwältigen</td>\n",
       "      <td>[überwältigen]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1818 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              main    nlpprocessed\n",
       "0            Abbau         [abbau]\n",
       "1          Abbruch       [abbruch]\n",
       "2        Abdankung     [abdankung]\n",
       "3       Abdämpfung    [abdämpfung]\n",
       "4           Abfall        [abfall]\n",
       "...            ...             ...\n",
       "1813     übersehen     [übersehen]\n",
       "1814   übertreiben   [übertreiben]\n",
       "1815    übertreten    [übertreten]\n",
       "1816   übertrieben   [übertrieben]\n",
       "1817  überwältigen  [überwältigen]\n",
       "\n",
       "[1818 rows x 2 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#text preprocessing\n",
    "positive1[\"nlpprocessed\"]=positive1['main'].apply(nlp_pipeline)\n",
    "negative1[\"nlpprocessed\"]=negative1['main'].apply(nlp_pipeline)\n",
    "negative1\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "positive1['nlpprocessed']=positive1['nlpprocessed'].apply(str)\n",
    "positive1['nlpprocessed'] = positive1['nlpprocessed'].str[2:-2]\n",
    "\n",
    "positive1\n",
    "\n",
    "\n",
    "\n",
    "negative1['nlpprocessed']=negative1['nlpprocessed'].apply(str)\n",
    "negative1['nlpprocessed'] = negative1['nlpprocessed'].str[2:-2]\n",
    "negative1\n",
    "\n",
    "\n",
    "\n",
    "positive1['nlpprocessed'][0] = \"abmachung\"\n",
    "\n",
    "del positive1['main']\n",
    "del negative1['main']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[['abbau'],\n",
       " ['abbruch'],\n",
       " ['abdankung'],\n",
       " ['abdämpfung'],\n",
       " ['abfall'],\n",
       " ['abfuhr'],\n",
       " ['abgrund'],\n",
       " ['abhängigkeit'],\n",
       " ['ablehnung'],\n",
       " ['ablenkung'],\n",
       " ['abnahme'],\n",
       " ['abneigung'],\n",
       " ['abnutzung'],\n",
       " ['abriss'],\n",
       " ['abrutsch'],\n",
       " ['abschaffung'],\n",
       " ['abschreckung'],\n",
       " ['abschreibung'],\n",
       " ['abschuß'],\n",
       " ['abschwächung'],\n",
       " ['absenkung'],\n",
       " ['abspaltung'],\n",
       " ['absperrung'],\n",
       " ['abstieg'],\n",
       " ['abstoß'],\n",
       " ['abstrich'],\n",
       " ['abstumpfung'],\n",
       " ['absturz'],\n",
       " ['absurdität'],\n",
       " ['abweichung'],\n",
       " ['abweisung'],\n",
       " ['abwertung'],\n",
       " ['abwärtstrend'],\n",
       " ['abzocke'],\n",
       " ['achtlosigkeit'],\n",
       " ['affäre'],\n",
       " ['aggression'],\n",
       " ['aggressivität'],\n",
       " ['aggressor'],\n",
       " ['agitation'],\n",
       " ['alarm'],\n",
       " ['alptraum'],\n",
       " ['amateur'],\n",
       " ['ambivalenz'],\n",
       " ['androhung'],\n",
       " ['anfälligkeit'],\n",
       " ['angreifer'],\n",
       " ['angriff'],\n",
       " ['angst'],\n",
       " ['anklage'],\n",
       " ['anmaßung'],\n",
       " ['annullierung'],\n",
       " ['anomalie'],\n",
       " ['anspannung'],\n",
       " ['anstrengung'],\n",
       " ['antipathie'],\n",
       " ['arbeitslose'],\n",
       " ['arbeitslosigkeit'],\n",
       " ['armut'],\n",
       " ['arroganz'],\n",
       " ['arschloch'],\n",
       " ['attacke'],\n",
       " ['aufhebung'],\n",
       " ['auflösung'],\n",
       " ['aufregung'],\n",
       " ['aufruhr'],\n",
       " ['aufschrei'],\n",
       " ['aufstand'],\n",
       " ['ausbeute'],\n",
       " ['ausbeuter'],\n",
       " ['ausbeutung'],\n",
       " ['ausbruch'],\n",
       " ['auseinandersetzung'],\n",
       " ['ausfall'],\n",
       " ['ausgrenzung'],\n",
       " ['auslöschung'],\n",
       " ['ausrottung'],\n",
       " ['ausschließung'],\n",
       " ['aussetzung'],\n",
       " ['banalität'],\n",
       " ['bankrott'],\n",
       " ['barriere'],\n",
       " ['bedeutungslosigkeit'],\n",
       " ['bedrohung'],\n",
       " ['bedrängung'],\n",
       " ['bedrückung'],\n",
       " ['bedürftige'],\n",
       " ['beeinträchtigung'],\n",
       " ['befall'],\n",
       " ['befangenheit'],\n",
       " ['befürchtung'],\n",
       " ['begrenzung'],\n",
       " ['behinderung'],\n",
       " ['belastung'],\n",
       " ['beleidigung'],\n",
       " ['belästigung'],\n",
       " ['beschneidung'],\n",
       " ['beschränkung'],\n",
       " ['beschuldigung'],\n",
       " ['beschwerde'],\n",
       " ['beschwerlichkeit'],\n",
       " ['beschädigung'],\n",
       " ['beschäftigungslose'],\n",
       " ['beschäftigungsloser'],\n",
       " ['beschämung'],\n",
       " ['beseitigung'],\n",
       " ['besorgnis'],\n",
       " ['bestechung'],\n",
       " ['besteuerung'],\n",
       " ['bestrafung'],\n",
       " ['bestürzung'],\n",
       " ['betrug'],\n",
       " ['betrüger'],\n",
       " ['beule'],\n",
       " ['beunruhigung'],\n",
       " ['bevormundung'],\n",
       " ['bitterkeit'],\n",
       " ['blindheit'],\n",
       " ['blockade'],\n",
       " ['blockierung'],\n",
       " ['blödheit'],\n",
       " ['bombardement'],\n",
       " ['bombardierung'],\n",
       " ['bombe'],\n",
       " ['bosheit'],\n",
       " ['brand'],\n",
       " ['bruch'],\n",
       " ['brutalität'],\n",
       " ['bußgeld'],\n",
       " ['bösartigkeit'],\n",
       " ['bösewicht'],\n",
       " ['bürde'],\n",
       " ['chaos'],\n",
       " ['crash'],\n",
       " ['defekt'],\n",
       " ['defizit'],\n",
       " ['deformation'],\n",
       " ['degradierung'],\n",
       " ['dekadenz'],\n",
       " ['demütigung'],\n",
       " ['denkfehler'],\n",
       " ['depression'],\n",
       " ['desaster'],\n",
       " ['desinteresse'],\n",
       " ['dezimierung'],\n",
       " ['dieb'],\n",
       " ['diebstahl'],\n",
       " ['diffamierung'],\n",
       " ['diktator'],\n",
       " ['dilemma'],\n",
       " ['diskriminierung'],\n",
       " ['dissens'],\n",
       " ['dominierung'],\n",
       " ['doppeldeutigkeit'],\n",
       " ['doppelspiel'],\n",
       " ['dreck'],\n",
       " ['drift'],\n",
       " ['drohung'],\n",
       " ['drosselung'],\n",
       " ['dummheit'],\n",
       " ['dummkopf'],\n",
       " ['dunkelheit'],\n",
       " ['durcheinander'],\n",
       " ['dussel'],\n",
       " ['dämpfer'],\n",
       " ['dürre'],\n",
       " ['düsternis'],\n",
       " ['egoist'],\n",
       " ['ehebruch'],\n",
       " ['einbrecher'],\n",
       " ['einbruch'],\n",
       " ['einbuße'],\n",
       " ['einsamkeit'],\n",
       " ['einschlag'],\n",
       " ['einschränkung'],\n",
       " ['einschüchterung'],\n",
       " ['einsturz'],\n",
       " ['ekel'],\n",
       " ['elend'],\n",
       " ['empörung'],\n",
       " ['ende'],\n",
       " ['energielosigkeit'],\n",
       " ['entbindung'],\n",
       " ['entfremdung'],\n",
       " ['entführung'],\n",
       " ['enthauptung'],\n",
       " ['entlassung'],\n",
       " ['entmutigung'],\n",
       " ['enttäuschung'],\n",
       " ['entziehung'],\n",
       " ['epidemie'],\n",
       " ['ermahnung'],\n",
       " ['ermordung'],\n",
       " ['ermüdung'],\n",
       " ['erniedrigung'],\n",
       " ['ernüchterung'],\n",
       " ['erosion'],\n",
       " ['erpressung'],\n",
       " ['erschöpfung'],\n",
       " ['erschütterung'],\n",
       " ['explosion'],\n",
       " ['fahrlässigkeit'],\n",
       " ['farce'],\n",
       " ['faschist'],\n",
       " ['faulheit'],\n",
       " ['fehde'],\n",
       " ['fehlanzeige'],\n",
       " ['fehler'],\n",
       " ['fehlermeldung'],\n",
       " ['fehlkauf'],\n",
       " ['fehlkonstruktion'],\n",
       " ['fehlleistung'],\n",
       " ['fehltritt'],\n",
       " ['fehlverhalten'],\n",
       " ['feind'],\n",
       " ['feuer'],\n",
       " ['fiasko'],\n",
       " ['finanzkrise'],\n",
       " ['finte'],\n",
       " ['flaute'],\n",
       " ['fluch'],\n",
       " ['flucht'],\n",
       " ['flüchtiger'],\n",
       " ['flüchtigkeit'],\n",
       " ['flüchtling'],\n",
       " ['frechheit'],\n",
       " ['frust'],\n",
       " ['frustration'],\n",
       " ['furcht'],\n",
       " ['fälschung'],\n",
       " ['gammler'],\n",
       " ['gangster'],\n",
       " ['garnichts'],\n",
       " ['gauner'],\n",
       " ['gebrechen'],\n",
       " ['gedränge'],\n",
       " ['gefahr'],\n",
       " ['gefecht'],\n",
       " ['gefährdung'],\n",
       " ['gegner'],\n",
       " ['geisteskrankheit'],\n",
       " ['geistlosigkeit'],\n",
       " ['geiz'],\n",
       " ['geizhals'],\n",
       " ['geizkragen'],\n",
       " ['geldstrafe'],\n",
       " ['geschmacklosigkeit'],\n",
       " ['geschäftsauflösung'],\n",
       " ['gestank'],\n",
       " ['gewalt'],\n",
       " ['gewalttätige'],\n",
       " ['gewalttätiger'],\n",
       " ['gift'],\n",
       " ['glanzlosigkeit'],\n",
       " ['gleichgültigkeit'],\n",
       " ['glücklosigkeit'],\n",
       " ['gnadenlosigkeit'],\n",
       " ['grausamkeit'],\n",
       " ['groll'],\n",
       " ['groteske'],\n",
       " ['habgier'],\n",
       " ['haft'],\n",
       " ['handgemenge'],\n",
       " ['handicap'],\n",
       " ['hass'],\n",
       " ['heimsuchung'],\n",
       " ['hektik'],\n",
       " ['hemmung'],\n",
       " ['herausforderung'],\n",
       " ['heuchelei'],\n",
       " ['heuchler'],\n",
       " ['hilflosigkeit'],\n",
       " ['hindernis'],\n",
       " ['hinterhalt'],\n",
       " ['hinterlist'],\n",
       " ['hohn'],\n",
       " ['horror'],\n",
       " ['hunger'],\n",
       " ['hungersnot'],\n",
       " ['hungertod'],\n",
       " ['hysterie'],\n",
       " ['härte'],\n",
       " ['hölle'],\n",
       " ['idiot'],\n",
       " ['illegalität'],\n",
       " ['immobilität'],\n",
       " ['ineffizienz'],\n",
       " ['infektion'],\n",
       " ['infiltration'],\n",
       " ['inflation'],\n",
       " ['inkompetenz'],\n",
       " ['inkonsequenz'],\n",
       " ['inkonsistenz'],\n",
       " ['instabilität'],\n",
       " ['intervention'],\n",
       " ['invasion'],\n",
       " ['isolation'],\n",
       " ['jammer'],\n",
       " ['jähzorn'],\n",
       " ['kampf'],\n",
       " ['kapitalverbrechen'],\n",
       " ['kapitulation'],\n",
       " ['katastrophe'],\n",
       " ['keim'],\n",
       " ['klage'],\n",
       " ['klischee'],\n",
       " ['kläger'],\n",
       " ['knappheit'],\n",
       " ['kollaps'],\n",
       " ['kollision'],\n",
       " ['komplikation'],\n",
       " ['konflikt'],\n",
       " ['konfrontation'],\n",
       " ['konjunkturrückgang'],\n",
       " ['konkurrenz'],\n",
       " ['konkurrenzkampf'],\n",
       " ['konkurs'],\n",
       " ['kontroverse'],\n",
       " ['kopfschmerzen'],\n",
       " ['korruption'],\n",
       " ['kostspieligkeit'],\n",
       " ['krach'],\n",
       " ['krankheit'],\n",
       " ['krieg'],\n",
       " ['kriminalität'],\n",
       " ['krise'],\n",
       " ['kritik'],\n",
       " ['kritiker'],\n",
       " ['kränkung'],\n",
       " ['krüppel'],\n",
       " ['kurseinbruch'],\n",
       " ['kälte'],\n",
       " ['kündigung'],\n",
       " ['kürzung'],\n",
       " ['langeweile'],\n",
       " ['langweiler'],\n",
       " ['last'],\n",
       " ['launenhaftigkeit'],\n",
       " ['lebensgefahr'],\n",
       " ['leblosigkeit'],\n",
       " ['leere'],\n",
       " ['leichtsinn'],\n",
       " ['leichtsinnsfehler'],\n",
       " ['leid'],\n",
       " ['leidende'],\n",
       " ['liquidation'],\n",
       " ['lähmung'],\n",
       " ['löschung'],\n",
       " ['lüge'],\n",
       " ['lügner'],\n",
       " ['makel'],\n",
       " ['mangel'],\n",
       " ['manipulation'],\n",
       " ['massaker'],\n",
       " ['maßlosigkeit'],\n",
       " ['melancholie'],\n",
       " ['melodrama'],\n",
       " ['merkwürdigkeit'],\n",
       " ['minderung'],\n",
       " ['minderwertigkeit'],\n",
       " ['missachtung'],\n",
       " ['missbrauch'],\n",
       " ['missgeschick'],\n",
       " ['missglück'],\n",
       " ['misstrauen'],\n",
       " ['misstrauensantrag'],\n",
       " ['missverständnis'],\n",
       " ['mist'],\n",
       " ['mittellosigkeit'],\n",
       " ['mittelmäßigkeit'],\n",
       " ['monotonie'],\n",
       " ['mord'],\n",
       " ['mutlosigkeit'],\n",
       " ['müdigkeit'],\n",
       " ['mühe'],\n",
       " ['müll'],\n",
       " ['nachlässigkeit'],\n",
       " ['nachteil'],\n",
       " ['naivität'],\n",
       " ['narr'],\n",
       " ['negativität'],\n",
       " ['neid'],\n",
       " ['nervosität'],\n",
       " ['neustart'],\n",
       " ['niedergang'],\n",
       " ['niedergeschlagenheit'],\n",
       " ['niederlage'],\n",
       " ['not'],\n",
       " ['notfall'],\n",
       " ['notstand'],\n",
       " ['nutzlosigkeit'],\n",
       " ['nötigung'],\n",
       " ['oberflächlichkeit'],\n",
       " ['offensive'],\n",
       " ['opposition'],\n",
       " ['panik'],\n",
       " ['panne'],\n",
       " ['pech'],\n",
       " ['pessimismus'],\n",
       " ['pest'],\n",
       " ['pflicht'],\n",
       " ['pleite'],\n",
       " ['preissturz'],\n",
       " ['problem'],\n",
       " ['propaganda'],\n",
       " ['protest'],\n",
       " ['provisorium'],\n",
       " ['provokation'],\n",
       " ['qual'],\n",
       " ['qualitätsminderung'],\n",
       " ['rache'],\n",
       " ['ratlosigkeit'],\n",
       " ['raub'],\n",
       " ['rebellen'],\n",
       " ['rebellion'],\n",
       " ['rechtswidrigkeit'],\n",
       " ['redundanz'],\n",
       " ['reinfall'],\n",
       " ['reklamation'],\n",
       " ['reparatur'],\n",
       " ['revolte'],\n",
       " ['revolution'],\n",
       " ['rezession'],\n",
       " ['risiko'],\n",
       " ['rivale'],\n",
       " ['rivalität'],\n",
       " ['rost'],\n",
       " ['ruin'],\n",
       " ['rutsch'],\n",
       " ['rätselraten'],\n",
       " ['räuber'],\n",
       " ['rückfall'],\n",
       " ['rückgang'],\n",
       " ['rückschritt'],\n",
       " ['rücksendung'],\n",
       " ['rücksichtslosigkeit'],\n",
       " ['rückstand'],\n",
       " ['rückständigkeit'],\n",
       " ['rücktritt'],\n",
       " ['rückzug'],\n",
       " ['sabotage'],\n",
       " ['sackgasse'],\n",
       " ['schaden'],\n",
       " ['schadensbild'],\n",
       " ['scham'],\n",
       " ['schande'],\n",
       " ['scheidung'],\n",
       " ['scheitern'],\n",
       " ['schelte'],\n",
       " ['scheußlichkeit'],\n",
       " ['schlachtfeld'],\n",
       " ['schlag'],\n",
       " ['schlamperei'],\n",
       " ['schlechtigkeit'],\n",
       " ['schlitterbahn'],\n",
       " ['schlägerei'],\n",
       " ['schmerz'],\n",
       " ['schmuggel'],\n",
       " ['schmutz'],\n",
       " ['schock'],\n",
       " ['schramme'],\n",
       " ['schreck'],\n",
       " ['schrott'],\n",
       " ['schräglauf'],\n",
       " ['schubs'],\n",
       " ['schuld'],\n",
       " ['schuldner'],\n",
       " ['schuldnerin'],\n",
       " ['schurke'],\n",
       " ['schwierigkeit'],\n",
       " ['schwund'],\n",
       " ['schwäche'],\n",
       " ['schwächung'],\n",
       " ['schäden'],\n",
       " ['schädigung'],\n",
       " ['senkung'],\n",
       " ['sinnlosigkeit'],\n",
       " ['sintflut'],\n",
       " ['skandal'],\n",
       " ['sklave'],\n",
       " ['sklavenarbeit'],\n",
       " ['sorge'],\n",
       " ['spott'],\n",
       " ['sprengstoff'],\n",
       " ['sprengung'],\n",
       " ['stagnation'],\n",
       " ['stau'],\n",
       " ['sterben'],\n",
       " ['steuerhinterziehung'],\n",
       " ['stilllegung'],\n",
       " ['stillstand'],\n",
       " ['stornierung'],\n",
       " ['stoß'],\n",
       " ['strafverfahren'],\n",
       " ['strapaze'],\n",
       " ['streik'],\n",
       " ['streit'],\n",
       " ['strenge'],\n",
       " ['stress'],\n",
       " ['streß'],\n",
       " ['sturheit'],\n",
       " ['sturz'],\n",
       " ['störung'],\n",
       " ['sucht'],\n",
       " ['sündenbock'],\n",
       " ['tabu'],\n",
       " ['terror'],\n",
       " ['terrorismus'],\n",
       " ['teuerung'],\n",
       " ['teuerungsrate'],\n",
       " ['teufelskreis'],\n",
       " ['tod'],\n",
       " ['todesfall'],\n",
       " ['todesstrafe'],\n",
       " ['torheit'],\n",
       " ['totschlag'],\n",
       " ['tragödie'],\n",
       " ['trauer'],\n",
       " ['trauma'],\n",
       " ['traurigkeit'],\n",
       " ['trennung'],\n",
       " ['tristesse'],\n",
       " ['trostlosigkeit'],\n",
       " ['trott'],\n",
       " ['trugschluß'],\n",
       " ['trägheit'],\n",
       " ['träne'],\n",
       " ['trübsal'],\n",
       " ['turbolenz'],\n",
       " ['turbolenzen'],\n",
       " ['tyrannei'],\n",
       " ['täuschung'],\n",
       " ['umtausch'],\n",
       " ['unbehagen'],\n",
       " ['unbehaglichkeit'],\n",
       " ['unbeliebtheit'],\n",
       " ['unbequemlichkeit'],\n",
       " ['unbestimmtheit'],\n",
       " ['undankbarkeit'],\n",
       " ['unehrlichkeit'],\n",
       " ['uneinigkeit'],\n",
       " ['unerbittlichkeit'],\n",
       " ['unerträglichkeit'],\n",
       " ['unfall'],\n",
       " ['unfreundlichkeit'],\n",
       " ['unfug'],\n",
       " ['unfähigkeit'],\n",
       " ['ungeduld'],\n",
       " ['ungehorsamkeit'],\n",
       " ['ungenauigkeit'],\n",
       " ['ungerechtigkeit'],\n",
       " ['ungeschicklichkeit'],\n",
       " ['unglaubwürdigkeit'],\n",
       " ['ungleichheit'],\n",
       " ['unglück'],\n",
       " ['unheilbarkeit'],\n",
       " ['unhöflichkeit'],\n",
       " ['unklarheit'],\n",
       " ['unmenschlichkeit'],\n",
       " ['unmoral'],\n",
       " ['unmut'],\n",
       " ['unordnung'],\n",
       " ['unrecht'],\n",
       " ['unregelmäßigkeit'],\n",
       " ['unrentabilität'],\n",
       " ['unruhe'],\n",
       " ['unsicherheit'],\n",
       " ['unsinn'],\n",
       " ['unstetigkeit'],\n",
       " ['unstimmigkeit'],\n",
       " ['unterbrechung'],\n",
       " ['unterdrückung'],\n",
       " ['untergang'],\n",
       " ['unterlassung'],\n",
       " ['unterwerfung'],\n",
       " ['untreue'],\n",
       " ['unverantwortlichkeit'],\n",
       " ['unvereinbarkeit'],\n",
       " ['unverhältnismäßigkeit'],\n",
       " ['unverschämtheit'],\n",
       " ['unvollkommenheit'],\n",
       " ['unvollständigkeit'],\n",
       " ['unwahrheit'],\n",
       " ['unwirksamkeit'],\n",
       " ['unwirtschaftlichkeit'],\n",
       " ['unwissenheit'],\n",
       " ['unzufriedenheit'],\n",
       " ['unzumutbarkeit'],\n",
       " ['unzuverlässigkeit'],\n",
       " ['vagheit'],\n",
       " ['verachtung'],\n",
       " ['verbannung'],\n",
       " ['verbot'],\n",
       " ['verdacht'],\n",
       " ['verdorbenheit'],\n",
       " ['verdrängung'],\n",
       " ['verdächtige'],\n",
       " ['verfall'],\n",
       " ['verfehlung'],\n",
       " ['vergeltung'],\n",
       " ['vergeltungsmaßnahme'],\n",
       " ['vergeudung'],\n",
       " ['verherrlichung'],\n",
       " ['verhängnis'],\n",
       " ['verkleinerung'],\n",
       " ['verlangsamung'],\n",
       " ['verletzung'],\n",
       " ['verleumdung'],\n",
       " ['verlierer'],\n",
       " ['verlust'],\n",
       " ['vermeidung'],\n",
       " ['verminderung'],\n",
       " ['vernachlässigung'],\n",
       " ['vernichtung'],\n",
       " ['verrat'],\n",
       " ['verräter'],\n",
       " ['verrückter'],\n",
       " ['verrücktheit'],\n",
       " ['versagen'],\n",
       " ['verschlechterung'],\n",
       " ['verschmutzung'],\n",
       " ['verschwendung'],\n",
       " ['verschwörung'],\n",
       " ['versenkung'],\n",
       " ['versklavung'],\n",
       " ['verstoß'],\n",
       " ['verstrickung'],\n",
       " ['versuchung'],\n",
       " ['versäumnis'],\n",
       " ['vertreibung'],\n",
       " ['verurteilung'],\n",
       " ['verweigerung'],\n",
       " ['verwerfung'],\n",
       " ['verwirrung'],\n",
       " ['verwundung'],\n",
       " ['verwüstung'],\n",
       " ['verzerrung'],\n",
       " ['verzicht'],\n",
       " ['verzweiflung'],\n",
       " ['verzögerung'],\n",
       " ['vorurteil'],\n",
       " ['vorwand'],\n",
       " ['vorwurf'],\n",
       " ['wahnsinn'],\n",
       " ['wermutstropfen'],\n",
       " ['wertlosigkeit'],\n",
       " ['wertverlust'],\n",
       " ['wichtigtuer'],\n",
       " ['widernatürlichkeit'],\n",
       " ['widerruf'],\n",
       " ['widerspruch'],\n",
       " ['widrigkeit'],\n",
       " ['wirtschaftskrise'],\n",
       " ['wrack'],\n",
       " ['wunde'],\n",
       " ['wut'],\n",
       " ['wüste'],\n",
       " ['zahlungsunfähigkeit'],\n",
       " ['zeitverschwendung'],\n",
       " ['zensur'],\n",
       " ['zerrung'],\n",
       " ['zerschlagung'],\n",
       " ['zerstörung'],\n",
       " ['ziellosigkeit'],\n",
       " ['zoll'],\n",
       " ['zorn'],\n",
       " ['zumutung'],\n",
       " ['zusammenbruch'],\n",
       " ['zusammenstoß'],\n",
       " ['zwang'],\n",
       " ['zwangslage'],\n",
       " ['zwangsmaßnahmen'],\n",
       " ['zweifel'],\n",
       " ['zwietracht'],\n",
       " ['abbauen'],\n",
       " ['abbrechen'],\n",
       " ['abdanken'],\n",
       " ['abdämpfen'],\n",
       " ['abfallen'],\n",
       " ['abfällig'],\n",
       " ['abführen'],\n",
       " ['abgebrochen'],\n",
       " ['abgedroschen'],\n",
       " ['abgestanden'],\n",
       " ['abgetakelt'],\n",
       " ['abgleiten'],\n",
       " ['abgründig'],\n",
       " ['abhängig'],\n",
       " ['ablaufen'],\n",
       " ['ablehnen'],\n",
       " ['ablenken'],\n",
       " ['abnehmen'],\n",
       " ['abnutzen'],\n",
       " ['abraten'],\n",
       " ['abreissen'],\n",
       " ['abrupt'],\n",
       " ['abrutschen'],\n",
       " ['abschaffen'],\n",
       " ['abschießen'],\n",
       " ['abschrecken'],\n",
       " ['abschreckend'],\n",
       " ['abschreiben'],\n",
       " ['abschwächen'],\n",
       " ['abschätzig'],\n",
       " ['absenken'],\n",
       " ['absinken'],\n",
       " ['abspalten'],\n",
       " ['absperren'],\n",
       " ['absteigen'],\n",
       " ['abstoßen'],\n",
       " ['abstoßend'],\n",
       " ['abstumpfen'],\n",
       " ['abstürzen'],\n",
       " ['absurd'],\n",
       " ['abtragen'],\n",
       " ['abweichen'],\n",
       " ['abweisen'],\n",
       " ['abwerten'],\n",
       " ['achtlos'],\n",
       " ['aggressiv'],\n",
       " ['alarmieren'],\n",
       " ['alkoholisiert'],\n",
       " ['alt'],\n",
       " ['altmodisch'],\n",
       " ['amateurhaft'],\n",
       " ['ambivalent'],\n",
       " ['androhen'],\n",
       " ['anfällig'],\n",
       " ['angespannt'],\n",
       " ['angestrengt'],\n",
       " ['angetrunken'],\n",
       " ['angreifen'],\n",
       " ['anklagen'],\n",
       " ['anmaßen'],\n",
       " ['annullieren'],\n",
       " ['anschießen'],\n",
       " ['anspannen'],\n",
       " ['anstrengen'],\n",
       " ['anstrengend'],\n",
       " ['anstößig'],\n",
       " ['antiquiert'],\n",
       " ['anzünden'],\n",
       " ['apathisch'],\n",
       " ['apokalyptisch'],\n",
       " ['arbeitslos'],\n",
       " ['archaisch'],\n",
       " ['arm'],\n",
       " ['armselig'],\n",
       " ['arrogant'],\n",
       " ['attackieren'],\n",
       " ['aufblasen'],\n",
       " ['aufblähen'],\n",
       " ['aufbringen'],\n",
       " ['auffallen'],\n",
       " ['aufgeben'],\n",
       " ['aufgebracht'],\n",
       " ['aufgeregt'],\n",
       " ['aufhören'],\n",
       " ['auflösen'],\n",
       " ['aufregen'],\n",
       " ['aufreibend'],\n",
       " ['aufrühren'],\n",
       " ['aufschlagen'],\n",
       " ['aufschreien'],\n",
       " ['aufwühlen'],\n",
       " ['ausbeuten'],\n",
       " ['ausbrechen'],\n",
       " ['auseinanderfallen'],\n",
       " ['auseinandersetzen'],\n",
       " ['ausfallen'],\n",
       " ['ausgehungert'],\n",
       " ['ausgestorben'],\n",
       " ['ausgleiten'],\n",
       " ['ausgrenzen'],\n",
       " ['ausradieren'],\n",
       " ['ausrotten'],\n",
       " ['ausschalten'],\n",
       " ['ausschließen'],\n",
       " ['aussetzen'],\n",
       " ['aussichtslos'],\n",
       " ['aussterben'],\n",
       " ['banal'],\n",
       " ['barbarisch'],\n",
       " ['beanstandet'],\n",
       " ['bedauerlich'],\n",
       " ['bedauern'],\n",
       " ['bedauernswert'],\n",
       " ['bedenklich'],\n",
       " ['bedeppert'],\n",
       " ['bedeutungslos'],\n",
       " ['bedrohen'],\n",
       " ['bedrohlich'],\n",
       " ['bedrängen'],\n",
       " ['bedrücken'],\n",
       " ['bedrückt'],\n",
       " ['bedürftig'],\n",
       " ['beeinträchtigen'],\n",
       " ['beenden'],\n",
       " ['befallen'],\n",
       " ['befangen'],\n",
       " ['befremdlich'],\n",
       " ['befürchten'],\n",
       " ['begrenzen'],\n",
       " ['begrenzt'],\n",
       " ['begriffsstutzig'],\n",
       " ['behindern'],\n",
       " ['behämmert'],\n",
       " ['beklagen'],\n",
       " ['beklagenswert'],\n",
       " ['bekloppt'],\n",
       " ['beknackt'],\n",
       " ['bekümmert'],\n",
       " ['belanglos'],\n",
       " ['belasten'],\n",
       " ['beleidigen'],\n",
       " ['beleidigend'],\n",
       " ['beleidigt'],\n",
       " ['belästigen'],\n",
       " ['berauben'],\n",
       " ['bergab'],\n",
       " ['bescheuert'],\n",
       " ['beschissen'],\n",
       " ['beschneiden'],\n",
       " ['beschruppt'],\n",
       " ['beschränken'],\n",
       " ['beschränkt'],\n",
       " ['beschuldigen'],\n",
       " ['beschweren'],\n",
       " ['beschwerlich'],\n",
       " ['beschädigen'],\n",
       " ['beschäftigungslos'],\n",
       " ['beschämen'],\n",
       " ['beseitigen'],\n",
       " ['besorgniserregend'],\n",
       " ['besorgt'],\n",
       " ['bestechen'],\n",
       " ['besteuern'],\n",
       " ['bestrafen'],\n",
       " ['bestürzt'],\n",
       " ['betrunken'],\n",
       " ['betrügen'],\n",
       " ['betrügerisch'],\n",
       " ['betäuben'],\n",
       " ['beunruhigen'],\n",
       " ['beunruhigend'],\n",
       " ['beunruhigt'],\n",
       " ['bevormunden'],\n",
       " ['bewegungslos'],\n",
       " ['beängstigend'],\n",
       " ['billig'],\n",
       " ['bitter'],\n",
       " ['bizarr'],\n",
       " ['blauäugig'],\n",
       " ['blind'],\n",
       " ['blockieren'],\n",
       " ['blutig'],\n",
       " ['blöd'],\n",
       " ['blöde'],\n",
       " ['bombardieren'],\n",
       " ['borniert'],\n",
       " ['boshaft'],\n",
       " ['brechen'],\n",
       " ['brennen'],\n",
       " ['brisant'],\n",
       " ['brutal'],\n",
       " ['brüchig'],\n",
       " ['bösartig'],\n",
       " ['böse'],\n",
       " ['chaotisch'],\n",
       " ['charakterschwach'],\n",
       " ['dahinschwinden'],\n",
       " ['debil'],\n",
       " ['defekt'],\n",
       " ['defizitär'],\n",
       " ['deformieren'],\n",
       " ['degradieren'],\n",
       " ['deinstallieren'],\n",
       " ['deinstalliert'],\n",
       " ['dekadent'],\n",
       " ['demütigen'],\n",
       " ['denkfaul'],\n",
       " ['depressiv'],\n",
       " ['desinteressiert'],\n",
       " ['desolat'],\n",
       " ['destruktiv'],\n",
       " ['dezimieren'],\n",
       " ['diffamieren'],\n",
       " ['diffizil'],\n",
       " ['diffus'],\n",
       " ['diktatorisch'],\n",
       " ['dilettantisch'],\n",
       " ['diskreditieren'],\n",
       " ['distanziert'],\n",
       " ['disziplinlos'],\n",
       " ['dominieren'],\n",
       " ['doof'],\n",
       " ['dramatisch'],\n",
       " ['drastisch'],\n",
       " ['dreckig'],\n",
       " ['dreist'],\n",
       " ['driften'],\n",
       " ['drohen'],\n",
       " ['drosseln'],\n",
       " ['drängelnd'],\n",
       " ['drängen'],\n",
       " ['dumm'],\n",
       " ['dunkel'],\n",
       " ['durchfallen'],\n",
       " ['dusselig'],\n",
       " ['dämlich'],\n",
       " ['dämpfen'],\n",
       " ['dürr'],\n",
       " ['düster'],\n",
       " ['egoistisch'],\n",
       " ['ehebrechen'],\n",
       " ['eigenartig'],\n",
       " ['einbehalten'],\n",
       " ['einbrechen'],\n",
       " ['einfältig'],\n",
       " ['eingehen'],\n",
       " ['eingeschränkt'],\n",
       " ['einsam'],\n",
       " ['einschlagen'],\n",
       " ['einschrumpfen'],\n",
       " ['einschränken'],\n",
       " ['einschüchtern'],\n",
       " ['einschüchternd'],\n",
       " ['einsinken'],\n",
       " ['einstellen'],\n",
       " ['einstürzen'],\n",
       " ['eintönig'],\n",
       " ['ekelerregend'],\n",
       " ['ekelig'],\n",
       " ['eklatant'],\n",
       " ['elend'],\n",
       " ['elendig'],\n",
       " ['empören'],\n",
       " ['energielos'],\n",
       " ['engstirnig'],\n",
       " ['entbehrungsreich'],\n",
       " ['entbinden'],\n",
       " ['entfremden'],\n",
       " ['entführen'],\n",
       " ['entgleiten'],\n",
       " ['enthaupten'],\n",
       " ['entkräftet'],\n",
       " ['entlassen'],\n",
       " ['entmutigen'],\n",
       " ['entnervt'],\n",
       " ['entrüstet'],\n",
       " ['entschwinden'],\n",
       " ['entsetzlich'],\n",
       " ['enttäuschen'],\n",
       " ['enttäuschend'],\n",
       " ['enttäuscht'],\n",
       " ['entwürdigend'],\n",
       " ['entziehen'],\n",
       " ['erbittert'],\n",
       " ['erbost'],\n",
       " ['erbrechen'],\n",
       " ['erbärmlich'],\n",
       " ['erdrückend'],\n",
       " ['ergaunern'],\n",
       " ['ergebnislos'],\n",
       " ['erleiden'],\n",
       " ['erliegen'],\n",
       " ['ermahnen'],\n",
       " ['ermorden'],\n",
       " ['ermüden'],\n",
       " ['erniedrigen'],\n",
       " ['ernüchternd'],\n",
       " ['erpressen'],\n",
       " ['erschießen'],\n",
       " ['erschlaffen'],\n",
       " ['erschlagen'],\n",
       " ['erschrecken'],\n",
       " ['erschreckend'],\n",
       " ['erschweren'],\n",
       " ['erschöpfen'],\n",
       " ['erschöpft'],\n",
       " ['erschüttern'],\n",
       " ['erschütternd'],\n",
       " ['erschüttert'],\n",
       " ['erstechen'],\n",
       " ['ersticken'],\n",
       " ['ertrinken'],\n",
       " ['erwürgen'],\n",
       " ['erzürnt'],\n",
       " ['existenzbedrohend'],\n",
       " ['explodieren'],\n",
       " ['fad'],\n",
       " ['fadenscheinig'],\n",
       " ['fahrlässig'],\n",
       " ['fallen'],\n",
       " ['falsch'],\n",
       " ['farblos'],\n",
       " ['faschistisch'],\n",
       " ['fatal'],\n",
       " ['faul'],\n",
       " ['fehlen'],\n",
       " ...]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#convert pandas to list\n",
    "positive_list= positive1.values.tolist()\n",
    "negative_list= negative1.values.tolist()\n",
    "positive_list\n",
    "\n",
    "negative_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['abbau',\n",
       " 'abbruch',\n",
       " 'abdankung',\n",
       " 'abdämpfung',\n",
       " 'abfall',\n",
       " 'abfuhr',\n",
       " 'abgrund',\n",
       " 'abhängigkeit',\n",
       " 'ablehnung',\n",
       " 'ablenkung',\n",
       " 'abnahme',\n",
       " 'abneigung',\n",
       " 'abnutzung',\n",
       " 'abriss',\n",
       " 'abrutsch',\n",
       " 'abschaffung',\n",
       " 'abschreckung',\n",
       " 'abschreibung',\n",
       " 'abschuß',\n",
       " 'abschwächung',\n",
       " 'absenkung',\n",
       " 'abspaltung',\n",
       " 'absperrung',\n",
       " 'abstieg',\n",
       " 'abstoß',\n",
       " 'abstrich',\n",
       " 'abstumpfung',\n",
       " 'absturz',\n",
       " 'absurdität',\n",
       " 'abweichung',\n",
       " 'abweisung',\n",
       " 'abwertung',\n",
       " 'abwärtstrend',\n",
       " 'abzocke',\n",
       " 'achtlosigkeit',\n",
       " 'affäre',\n",
       " 'aggression',\n",
       " 'aggressivität',\n",
       " 'aggressor',\n",
       " 'agitation',\n",
       " 'alarm',\n",
       " 'alptraum',\n",
       " 'amateur',\n",
       " 'ambivalenz',\n",
       " 'androhung',\n",
       " 'anfälligkeit',\n",
       " 'angreifer',\n",
       " 'angriff',\n",
       " 'angst',\n",
       " 'anklage',\n",
       " 'anmaßung',\n",
       " 'annullierung',\n",
       " 'anomalie',\n",
       " 'anspannung',\n",
       " 'anstrengung',\n",
       " 'antipathie',\n",
       " 'arbeitslose',\n",
       " 'arbeitslosigkeit',\n",
       " 'armut',\n",
       " 'arroganz',\n",
       " 'arschloch',\n",
       " 'attacke',\n",
       " 'aufhebung',\n",
       " 'auflösung',\n",
       " 'aufregung',\n",
       " 'aufruhr',\n",
       " 'aufschrei',\n",
       " 'aufstand',\n",
       " 'ausbeute',\n",
       " 'ausbeuter',\n",
       " 'ausbeutung',\n",
       " 'ausbruch',\n",
       " 'auseinandersetzung',\n",
       " 'ausfall',\n",
       " 'ausgrenzung',\n",
       " 'auslöschung',\n",
       " 'ausrottung',\n",
       " 'ausschließung',\n",
       " 'aussetzung',\n",
       " 'banalität',\n",
       " 'bankrott',\n",
       " 'barriere',\n",
       " 'bedeutungslosigkeit',\n",
       " 'bedrohung',\n",
       " 'bedrängung',\n",
       " 'bedrückung',\n",
       " 'bedürftige',\n",
       " 'beeinträchtigung',\n",
       " 'befall',\n",
       " 'befangenheit',\n",
       " 'befürchtung',\n",
       " 'begrenzung',\n",
       " 'behinderung',\n",
       " 'belastung',\n",
       " 'beleidigung',\n",
       " 'belästigung',\n",
       " 'beschneidung',\n",
       " 'beschränkung',\n",
       " 'beschuldigung',\n",
       " 'beschwerde',\n",
       " 'beschwerlichkeit',\n",
       " 'beschädigung',\n",
       " 'beschäftigungslose',\n",
       " 'beschäftigungsloser',\n",
       " 'beschämung',\n",
       " 'beseitigung',\n",
       " 'besorgnis',\n",
       " 'bestechung',\n",
       " 'besteuerung',\n",
       " 'bestrafung',\n",
       " 'bestürzung',\n",
       " 'betrug',\n",
       " 'betrüger',\n",
       " 'beule',\n",
       " 'beunruhigung',\n",
       " 'bevormundung',\n",
       " 'bitterkeit',\n",
       " 'blindheit',\n",
       " 'blockade',\n",
       " 'blockierung',\n",
       " 'blödheit',\n",
       " 'bombardement',\n",
       " 'bombardierung',\n",
       " 'bombe',\n",
       " 'bosheit',\n",
       " 'brand',\n",
       " 'bruch',\n",
       " 'brutalität',\n",
       " 'bußgeld',\n",
       " 'bösartigkeit',\n",
       " 'bösewicht',\n",
       " 'bürde',\n",
       " 'chaos',\n",
       " 'crash',\n",
       " 'defekt',\n",
       " 'defizit',\n",
       " 'deformation',\n",
       " 'degradierung',\n",
       " 'dekadenz',\n",
       " 'demütigung',\n",
       " 'denkfehler',\n",
       " 'depression',\n",
       " 'desaster',\n",
       " 'desinteresse',\n",
       " 'dezimierung',\n",
       " 'dieb',\n",
       " 'diebstahl',\n",
       " 'diffamierung',\n",
       " 'diktator',\n",
       " 'dilemma',\n",
       " 'diskriminierung',\n",
       " 'dissens',\n",
       " 'dominierung',\n",
       " 'doppeldeutigkeit',\n",
       " 'doppelspiel',\n",
       " 'dreck',\n",
       " 'drift',\n",
       " 'drohung',\n",
       " 'drosselung',\n",
       " 'dummheit',\n",
       " 'dummkopf',\n",
       " 'dunkelheit',\n",
       " 'durcheinander',\n",
       " 'dussel',\n",
       " 'dämpfer',\n",
       " 'dürre',\n",
       " 'düsternis',\n",
       " 'egoist',\n",
       " 'ehebruch',\n",
       " 'einbrecher',\n",
       " 'einbruch',\n",
       " 'einbuße',\n",
       " 'einsamkeit',\n",
       " 'einschlag',\n",
       " 'einschränkung',\n",
       " 'einschüchterung',\n",
       " 'einsturz',\n",
       " 'ekel',\n",
       " 'elend',\n",
       " 'empörung',\n",
       " 'ende',\n",
       " 'energielosigkeit',\n",
       " 'entbindung',\n",
       " 'entfremdung',\n",
       " 'entführung',\n",
       " 'enthauptung',\n",
       " 'entlassung',\n",
       " 'entmutigung',\n",
       " 'enttäuschung',\n",
       " 'entziehung',\n",
       " 'epidemie',\n",
       " 'ermahnung',\n",
       " 'ermordung',\n",
       " 'ermüdung',\n",
       " 'erniedrigung',\n",
       " 'ernüchterung',\n",
       " 'erosion',\n",
       " 'erpressung',\n",
       " 'erschöpfung',\n",
       " 'erschütterung',\n",
       " 'explosion',\n",
       " 'fahrlässigkeit',\n",
       " 'farce',\n",
       " 'faschist',\n",
       " 'faulheit',\n",
       " 'fehde',\n",
       " 'fehlanzeige',\n",
       " 'fehler',\n",
       " 'fehlermeldung',\n",
       " 'fehlkauf',\n",
       " 'fehlkonstruktion',\n",
       " 'fehlleistung',\n",
       " 'fehltritt',\n",
       " 'fehlverhalten',\n",
       " 'feind',\n",
       " 'feuer',\n",
       " 'fiasko',\n",
       " 'finanzkrise',\n",
       " 'finte',\n",
       " 'flaute',\n",
       " 'fluch',\n",
       " 'flucht',\n",
       " 'flüchtiger',\n",
       " 'flüchtigkeit',\n",
       " 'flüchtling',\n",
       " 'frechheit',\n",
       " 'frust',\n",
       " 'frustration',\n",
       " 'furcht',\n",
       " 'fälschung',\n",
       " 'gammler',\n",
       " 'gangster',\n",
       " 'garnichts',\n",
       " 'gauner',\n",
       " 'gebrechen',\n",
       " 'gedränge',\n",
       " 'gefahr',\n",
       " 'gefecht',\n",
       " 'gefährdung',\n",
       " 'gegner',\n",
       " 'geisteskrankheit',\n",
       " 'geistlosigkeit',\n",
       " 'geiz',\n",
       " 'geizhals',\n",
       " 'geizkragen',\n",
       " 'geldstrafe',\n",
       " 'geschmacklosigkeit',\n",
       " 'geschäftsauflösung',\n",
       " 'gestank',\n",
       " 'gewalt',\n",
       " 'gewalttätige',\n",
       " 'gewalttätiger',\n",
       " 'gift',\n",
       " 'glanzlosigkeit',\n",
       " 'gleichgültigkeit',\n",
       " 'glücklosigkeit',\n",
       " 'gnadenlosigkeit',\n",
       " 'grausamkeit',\n",
       " 'groll',\n",
       " 'groteske',\n",
       " 'habgier',\n",
       " 'haft',\n",
       " 'handgemenge',\n",
       " 'handicap',\n",
       " 'hass',\n",
       " 'heimsuchung',\n",
       " 'hektik',\n",
       " 'hemmung',\n",
       " 'herausforderung',\n",
       " 'heuchelei',\n",
       " 'heuchler',\n",
       " 'hilflosigkeit',\n",
       " 'hindernis',\n",
       " 'hinterhalt',\n",
       " 'hinterlist',\n",
       " 'hohn',\n",
       " 'horror',\n",
       " 'hunger',\n",
       " 'hungersnot',\n",
       " 'hungertod',\n",
       " 'hysterie',\n",
       " 'härte',\n",
       " 'hölle',\n",
       " 'idiot',\n",
       " 'illegalität',\n",
       " 'immobilität',\n",
       " 'ineffizienz',\n",
       " 'infektion',\n",
       " 'infiltration',\n",
       " 'inflation',\n",
       " 'inkompetenz',\n",
       " 'inkonsequenz',\n",
       " 'inkonsistenz',\n",
       " 'instabilität',\n",
       " 'intervention',\n",
       " 'invasion',\n",
       " 'isolation',\n",
       " 'jammer',\n",
       " 'jähzorn',\n",
       " 'kampf',\n",
       " 'kapitalverbrechen',\n",
       " 'kapitulation',\n",
       " 'katastrophe',\n",
       " 'keim',\n",
       " 'klage',\n",
       " 'klischee',\n",
       " 'kläger',\n",
       " 'knappheit',\n",
       " 'kollaps',\n",
       " 'kollision',\n",
       " 'komplikation',\n",
       " 'konflikt',\n",
       " 'konfrontation',\n",
       " 'konjunkturrückgang',\n",
       " 'konkurrenz',\n",
       " 'konkurrenzkampf',\n",
       " 'konkurs',\n",
       " 'kontroverse',\n",
       " 'kopfschmerzen',\n",
       " 'korruption',\n",
       " 'kostspieligkeit',\n",
       " 'krach',\n",
       " 'krankheit',\n",
       " 'krieg',\n",
       " 'kriminalität',\n",
       " 'krise',\n",
       " 'kritik',\n",
       " 'kritiker',\n",
       " 'kränkung',\n",
       " 'krüppel',\n",
       " 'kurseinbruch',\n",
       " 'kälte',\n",
       " 'kündigung',\n",
       " 'kürzung',\n",
       " 'langeweile',\n",
       " 'langweiler',\n",
       " 'last',\n",
       " 'launenhaftigkeit',\n",
       " 'lebensgefahr',\n",
       " 'leblosigkeit',\n",
       " 'leere',\n",
       " 'leichtsinn',\n",
       " 'leichtsinnsfehler',\n",
       " 'leid',\n",
       " 'leidende',\n",
       " 'liquidation',\n",
       " 'lähmung',\n",
       " 'löschung',\n",
       " 'lüge',\n",
       " 'lügner',\n",
       " 'makel',\n",
       " 'mangel',\n",
       " 'manipulation',\n",
       " 'massaker',\n",
       " 'maßlosigkeit',\n",
       " 'melancholie',\n",
       " 'melodrama',\n",
       " 'merkwürdigkeit',\n",
       " 'minderung',\n",
       " 'minderwertigkeit',\n",
       " 'missachtung',\n",
       " 'missbrauch',\n",
       " 'missgeschick',\n",
       " 'missglück',\n",
       " 'misstrauen',\n",
       " 'misstrauensantrag',\n",
       " 'missverständnis',\n",
       " 'mist',\n",
       " 'mittellosigkeit',\n",
       " 'mittelmäßigkeit',\n",
       " 'monotonie',\n",
       " 'mord',\n",
       " 'mutlosigkeit',\n",
       " 'müdigkeit',\n",
       " 'mühe',\n",
       " 'müll',\n",
       " 'nachlässigkeit',\n",
       " 'nachteil',\n",
       " 'naivität',\n",
       " 'narr',\n",
       " 'negativität',\n",
       " 'neid',\n",
       " 'nervosität',\n",
       " 'neustart',\n",
       " 'niedergang',\n",
       " 'niedergeschlagenheit',\n",
       " 'niederlage',\n",
       " 'not',\n",
       " 'notfall',\n",
       " 'notstand',\n",
       " 'nutzlosigkeit',\n",
       " 'nötigung',\n",
       " 'oberflächlichkeit',\n",
       " 'offensive',\n",
       " 'opposition',\n",
       " 'panik',\n",
       " 'panne',\n",
       " 'pech',\n",
       " 'pessimismus',\n",
       " 'pest',\n",
       " 'pflicht',\n",
       " 'pleite',\n",
       " 'preissturz',\n",
       " 'problem',\n",
       " 'propaganda',\n",
       " 'protest',\n",
       " 'provisorium',\n",
       " 'provokation',\n",
       " 'qual',\n",
       " 'qualitätsminderung',\n",
       " 'rache',\n",
       " 'ratlosigkeit',\n",
       " 'raub',\n",
       " 'rebellen',\n",
       " 'rebellion',\n",
       " 'rechtswidrigkeit',\n",
       " 'redundanz',\n",
       " 'reinfall',\n",
       " 'reklamation',\n",
       " 'reparatur',\n",
       " 'revolte',\n",
       " 'revolution',\n",
       " 'rezession',\n",
       " 'risiko',\n",
       " 'rivale',\n",
       " 'rivalität',\n",
       " 'rost',\n",
       " 'ruin',\n",
       " 'rutsch',\n",
       " 'rätselraten',\n",
       " 'räuber',\n",
       " 'rückfall',\n",
       " 'rückgang',\n",
       " 'rückschritt',\n",
       " 'rücksendung',\n",
       " 'rücksichtslosigkeit',\n",
       " 'rückstand',\n",
       " 'rückständigkeit',\n",
       " 'rücktritt',\n",
       " 'rückzug',\n",
       " 'sabotage',\n",
       " 'sackgasse',\n",
       " 'schaden',\n",
       " 'schadensbild',\n",
       " 'scham',\n",
       " 'schande',\n",
       " 'scheidung',\n",
       " 'scheitern',\n",
       " 'schelte',\n",
       " 'scheußlichkeit',\n",
       " 'schlachtfeld',\n",
       " 'schlag',\n",
       " 'schlamperei',\n",
       " 'schlechtigkeit',\n",
       " 'schlitterbahn',\n",
       " 'schlägerei',\n",
       " 'schmerz',\n",
       " 'schmuggel',\n",
       " 'schmutz',\n",
       " 'schock',\n",
       " 'schramme',\n",
       " 'schreck',\n",
       " 'schrott',\n",
       " 'schräglauf',\n",
       " 'schubs',\n",
       " 'schuld',\n",
       " 'schuldner',\n",
       " 'schuldnerin',\n",
       " 'schurke',\n",
       " 'schwierigkeit',\n",
       " 'schwund',\n",
       " 'schwäche',\n",
       " 'schwächung',\n",
       " 'schäden',\n",
       " 'schädigung',\n",
       " 'senkung',\n",
       " 'sinnlosigkeit',\n",
       " 'sintflut',\n",
       " 'skandal',\n",
       " 'sklave',\n",
       " 'sklavenarbeit',\n",
       " 'sorge',\n",
       " 'spott',\n",
       " 'sprengstoff',\n",
       " 'sprengung',\n",
       " 'stagnation',\n",
       " 'stau',\n",
       " 'sterben',\n",
       " 'steuerhinterziehung',\n",
       " 'stilllegung',\n",
       " 'stillstand',\n",
       " 'stornierung',\n",
       " 'stoß',\n",
       " 'strafverfahren',\n",
       " 'strapaze',\n",
       " 'streik',\n",
       " 'streit',\n",
       " 'strenge',\n",
       " 'stress',\n",
       " 'streß',\n",
       " 'sturheit',\n",
       " 'sturz',\n",
       " 'störung',\n",
       " 'sucht',\n",
       " 'sündenbock',\n",
       " 'tabu',\n",
       " 'terror',\n",
       " 'terrorismus',\n",
       " 'teuerung',\n",
       " 'teuerungsrate',\n",
       " 'teufelskreis',\n",
       " 'tod',\n",
       " 'todesfall',\n",
       " 'todesstrafe',\n",
       " 'torheit',\n",
       " 'totschlag',\n",
       " 'tragödie',\n",
       " 'trauer',\n",
       " 'trauma',\n",
       " 'traurigkeit',\n",
       " 'trennung',\n",
       " 'tristesse',\n",
       " 'trostlosigkeit',\n",
       " 'trott',\n",
       " 'trugschluß',\n",
       " 'trägheit',\n",
       " 'träne',\n",
       " 'trübsal',\n",
       " 'turbolenz',\n",
       " 'turbolenzen',\n",
       " 'tyrannei',\n",
       " 'täuschung',\n",
       " 'umtausch',\n",
       " 'unbehagen',\n",
       " 'unbehaglichkeit',\n",
       " 'unbeliebtheit',\n",
       " 'unbequemlichkeit',\n",
       " 'unbestimmtheit',\n",
       " 'undankbarkeit',\n",
       " 'unehrlichkeit',\n",
       " 'uneinigkeit',\n",
       " 'unerbittlichkeit',\n",
       " 'unerträglichkeit',\n",
       " 'unfall',\n",
       " 'unfreundlichkeit',\n",
       " 'unfug',\n",
       " 'unfähigkeit',\n",
       " 'ungeduld',\n",
       " 'ungehorsamkeit',\n",
       " 'ungenauigkeit',\n",
       " 'ungerechtigkeit',\n",
       " 'ungeschicklichkeit',\n",
       " 'unglaubwürdigkeit',\n",
       " 'ungleichheit',\n",
       " 'unglück',\n",
       " 'unheilbarkeit',\n",
       " 'unhöflichkeit',\n",
       " 'unklarheit',\n",
       " 'unmenschlichkeit',\n",
       " 'unmoral',\n",
       " 'unmut',\n",
       " 'unordnung',\n",
       " 'unrecht',\n",
       " 'unregelmäßigkeit',\n",
       " 'unrentabilität',\n",
       " 'unruhe',\n",
       " 'unsicherheit',\n",
       " 'unsinn',\n",
       " 'unstetigkeit',\n",
       " 'unstimmigkeit',\n",
       " 'unterbrechung',\n",
       " 'unterdrückung',\n",
       " 'untergang',\n",
       " 'unterlassung',\n",
       " 'unterwerfung',\n",
       " 'untreue',\n",
       " 'unverantwortlichkeit',\n",
       " 'unvereinbarkeit',\n",
       " 'unverhältnismäßigkeit',\n",
       " 'unverschämtheit',\n",
       " 'unvollkommenheit',\n",
       " 'unvollständigkeit',\n",
       " 'unwahrheit',\n",
       " 'unwirksamkeit',\n",
       " 'unwirtschaftlichkeit',\n",
       " 'unwissenheit',\n",
       " 'unzufriedenheit',\n",
       " 'unzumutbarkeit',\n",
       " 'unzuverlässigkeit',\n",
       " 'vagheit',\n",
       " 'verachtung',\n",
       " 'verbannung',\n",
       " 'verbot',\n",
       " 'verdacht',\n",
       " 'verdorbenheit',\n",
       " 'verdrängung',\n",
       " 'verdächtige',\n",
       " 'verfall',\n",
       " 'verfehlung',\n",
       " 'vergeltung',\n",
       " 'vergeltungsmaßnahme',\n",
       " 'vergeudung',\n",
       " 'verherrlichung',\n",
       " 'verhängnis',\n",
       " 'verkleinerung',\n",
       " 'verlangsamung',\n",
       " 'verletzung',\n",
       " 'verleumdung',\n",
       " 'verlierer',\n",
       " 'verlust',\n",
       " 'vermeidung',\n",
       " 'verminderung',\n",
       " 'vernachlässigung',\n",
       " 'vernichtung',\n",
       " 'verrat',\n",
       " 'verräter',\n",
       " 'verrückter',\n",
       " 'verrücktheit',\n",
       " 'versagen',\n",
       " 'verschlechterung',\n",
       " 'verschmutzung',\n",
       " 'verschwendung',\n",
       " 'verschwörung',\n",
       " 'versenkung',\n",
       " 'versklavung',\n",
       " 'verstoß',\n",
       " 'verstrickung',\n",
       " 'versuchung',\n",
       " 'versäumnis',\n",
       " 'vertreibung',\n",
       " 'verurteilung',\n",
       " 'verweigerung',\n",
       " 'verwerfung',\n",
       " 'verwirrung',\n",
       " 'verwundung',\n",
       " 'verwüstung',\n",
       " 'verzerrung',\n",
       " 'verzicht',\n",
       " 'verzweiflung',\n",
       " 'verzögerung',\n",
       " 'vorurteil',\n",
       " 'vorwand',\n",
       " 'vorwurf',\n",
       " 'wahnsinn',\n",
       " 'wermutstropfen',\n",
       " 'wertlosigkeit',\n",
       " 'wertverlust',\n",
       " 'wichtigtuer',\n",
       " 'widernatürlichkeit',\n",
       " 'widerruf',\n",
       " 'widerspruch',\n",
       " 'widrigkeit',\n",
       " 'wirtschaftskrise',\n",
       " 'wrack',\n",
       " 'wunde',\n",
       " 'wut',\n",
       " 'wüste',\n",
       " 'zahlungsunfähigkeit',\n",
       " 'zeitverschwendung',\n",
       " 'zensur',\n",
       " 'zerrung',\n",
       " 'zerschlagung',\n",
       " 'zerstörung',\n",
       " 'ziellosigkeit',\n",
       " 'zoll',\n",
       " 'zorn',\n",
       " 'zumutung',\n",
       " 'zusammenbruch',\n",
       " 'zusammenstoß',\n",
       " 'zwang',\n",
       " 'zwangslage',\n",
       " 'zwangsmaßnahmen',\n",
       " 'zweifel',\n",
       " 'zwietracht',\n",
       " 'abbauen',\n",
       " 'abbrechen',\n",
       " 'abdanken',\n",
       " 'abdämpfen',\n",
       " 'abfallen',\n",
       " 'abfällig',\n",
       " 'abführen',\n",
       " 'abgebrochen',\n",
       " 'abgedroschen',\n",
       " 'abgestanden',\n",
       " 'abgetakelt',\n",
       " 'abgleiten',\n",
       " 'abgründig',\n",
       " 'abhängig',\n",
       " 'ablaufen',\n",
       " 'ablehnen',\n",
       " 'ablenken',\n",
       " 'abnehmen',\n",
       " 'abnutzen',\n",
       " 'abraten',\n",
       " 'abreissen',\n",
       " 'abrupt',\n",
       " 'abrutschen',\n",
       " 'abschaffen',\n",
       " 'abschießen',\n",
       " 'abschrecken',\n",
       " 'abschreckend',\n",
       " 'abschreiben',\n",
       " 'abschwächen',\n",
       " 'abschätzig',\n",
       " 'absenken',\n",
       " 'absinken',\n",
       " 'abspalten',\n",
       " 'absperren',\n",
       " 'absteigen',\n",
       " 'abstoßen',\n",
       " 'abstoßend',\n",
       " 'abstumpfen',\n",
       " 'abstürzen',\n",
       " 'absurd',\n",
       " 'abtragen',\n",
       " 'abweichen',\n",
       " 'abweisen',\n",
       " 'abwerten',\n",
       " 'achtlos',\n",
       " 'aggressiv',\n",
       " 'alarmieren',\n",
       " 'alkoholisiert',\n",
       " 'alt',\n",
       " 'altmodisch',\n",
       " 'amateurhaft',\n",
       " 'ambivalent',\n",
       " 'androhen',\n",
       " 'anfällig',\n",
       " 'angespannt',\n",
       " 'angestrengt',\n",
       " 'angetrunken',\n",
       " 'angreifen',\n",
       " 'anklagen',\n",
       " 'anmaßen',\n",
       " 'annullieren',\n",
       " 'anschießen',\n",
       " 'anspannen',\n",
       " 'anstrengen',\n",
       " 'anstrengend',\n",
       " 'anstößig',\n",
       " 'antiquiert',\n",
       " 'anzünden',\n",
       " 'apathisch',\n",
       " 'apokalyptisch',\n",
       " 'arbeitslos',\n",
       " 'archaisch',\n",
       " 'arm',\n",
       " 'armselig',\n",
       " 'arrogant',\n",
       " 'attackieren',\n",
       " 'aufblasen',\n",
       " 'aufblähen',\n",
       " 'aufbringen',\n",
       " 'auffallen',\n",
       " 'aufgeben',\n",
       " 'aufgebracht',\n",
       " 'aufgeregt',\n",
       " 'aufhören',\n",
       " 'auflösen',\n",
       " 'aufregen',\n",
       " 'aufreibend',\n",
       " 'aufrühren',\n",
       " 'aufschlagen',\n",
       " 'aufschreien',\n",
       " 'aufwühlen',\n",
       " 'ausbeuten',\n",
       " 'ausbrechen',\n",
       " 'auseinanderfallen',\n",
       " 'auseinandersetzen',\n",
       " 'ausfallen',\n",
       " 'ausgehungert',\n",
       " 'ausgestorben',\n",
       " 'ausgleiten',\n",
       " 'ausgrenzen',\n",
       " 'ausradieren',\n",
       " 'ausrotten',\n",
       " 'ausschalten',\n",
       " 'ausschließen',\n",
       " 'aussetzen',\n",
       " 'aussichtslos',\n",
       " 'aussterben',\n",
       " 'banal',\n",
       " 'barbarisch',\n",
       " 'beanstandet',\n",
       " 'bedauerlich',\n",
       " 'bedauern',\n",
       " 'bedauernswert',\n",
       " 'bedenklich',\n",
       " 'bedeppert',\n",
       " 'bedeutungslos',\n",
       " 'bedrohen',\n",
       " 'bedrohlich',\n",
       " 'bedrängen',\n",
       " 'bedrücken',\n",
       " 'bedrückt',\n",
       " 'bedürftig',\n",
       " 'beeinträchtigen',\n",
       " 'beenden',\n",
       " 'befallen',\n",
       " 'befangen',\n",
       " 'befremdlich',\n",
       " 'befürchten',\n",
       " 'begrenzen',\n",
       " 'begrenzt',\n",
       " 'begriffsstutzig',\n",
       " 'behindern',\n",
       " 'behämmert',\n",
       " 'beklagen',\n",
       " 'beklagenswert',\n",
       " 'bekloppt',\n",
       " 'beknackt',\n",
       " 'bekümmert',\n",
       " 'belanglos',\n",
       " 'belasten',\n",
       " 'beleidigen',\n",
       " 'beleidigend',\n",
       " 'beleidigt',\n",
       " 'belästigen',\n",
       " 'berauben',\n",
       " 'bergab',\n",
       " 'bescheuert',\n",
       " 'beschissen',\n",
       " 'beschneiden',\n",
       " 'beschruppt',\n",
       " 'beschränken',\n",
       " 'beschränkt',\n",
       " 'beschuldigen',\n",
       " 'beschweren',\n",
       " 'beschwerlich',\n",
       " 'beschädigen',\n",
       " 'beschäftigungslos',\n",
       " 'beschämen',\n",
       " 'beseitigen',\n",
       " 'besorgniserregend',\n",
       " 'besorgt',\n",
       " 'bestechen',\n",
       " 'besteuern',\n",
       " 'bestrafen',\n",
       " 'bestürzt',\n",
       " 'betrunken',\n",
       " 'betrügen',\n",
       " 'betrügerisch',\n",
       " 'betäuben',\n",
       " 'beunruhigen',\n",
       " 'beunruhigend',\n",
       " 'beunruhigt',\n",
       " 'bevormunden',\n",
       " 'bewegungslos',\n",
       " 'beängstigend',\n",
       " 'billig',\n",
       " 'bitter',\n",
       " 'bizarr',\n",
       " 'blauäugig',\n",
       " 'blind',\n",
       " 'blockieren',\n",
       " 'blutig',\n",
       " 'blöd',\n",
       " 'blöde',\n",
       " 'bombardieren',\n",
       " 'borniert',\n",
       " 'boshaft',\n",
       " 'brechen',\n",
       " 'brennen',\n",
       " 'brisant',\n",
       " 'brutal',\n",
       " 'brüchig',\n",
       " 'bösartig',\n",
       " 'böse',\n",
       " 'chaotisch',\n",
       " 'charakterschwach',\n",
       " 'dahinschwinden',\n",
       " 'debil',\n",
       " 'defekt',\n",
       " 'defizitär',\n",
       " 'deformieren',\n",
       " 'degradieren',\n",
       " 'deinstallieren',\n",
       " 'deinstalliert',\n",
       " 'dekadent',\n",
       " 'demütigen',\n",
       " 'denkfaul',\n",
       " 'depressiv',\n",
       " 'desinteressiert',\n",
       " 'desolat',\n",
       " 'destruktiv',\n",
       " 'dezimieren',\n",
       " 'diffamieren',\n",
       " 'diffizil',\n",
       " 'diffus',\n",
       " 'diktatorisch',\n",
       " 'dilettantisch',\n",
       " 'diskreditieren',\n",
       " 'distanziert',\n",
       " 'disziplinlos',\n",
       " 'dominieren',\n",
       " 'doof',\n",
       " 'dramatisch',\n",
       " 'drastisch',\n",
       " 'dreckig',\n",
       " 'dreist',\n",
       " 'driften',\n",
       " 'drohen',\n",
       " 'drosseln',\n",
       " 'drängelnd',\n",
       " 'drängen',\n",
       " 'dumm',\n",
       " 'dunkel',\n",
       " 'durchfallen',\n",
       " 'dusselig',\n",
       " 'dämlich',\n",
       " 'dämpfen',\n",
       " 'dürr',\n",
       " 'düster',\n",
       " 'egoistisch',\n",
       " 'ehebrechen',\n",
       " 'eigenartig',\n",
       " 'einbehalten',\n",
       " 'einbrechen',\n",
       " 'einfältig',\n",
       " 'eingehen',\n",
       " 'eingeschränkt',\n",
       " 'einsam',\n",
       " 'einschlagen',\n",
       " 'einschrumpfen',\n",
       " 'einschränken',\n",
       " 'einschüchtern',\n",
       " 'einschüchternd',\n",
       " 'einsinken',\n",
       " 'einstellen',\n",
       " 'einstürzen',\n",
       " 'eintönig',\n",
       " 'ekelerregend',\n",
       " 'ekelig',\n",
       " 'eklatant',\n",
       " 'elend',\n",
       " 'elendig',\n",
       " 'empören',\n",
       " 'energielos',\n",
       " 'engstirnig',\n",
       " 'entbehrungsreich',\n",
       " 'entbinden',\n",
       " 'entfremden',\n",
       " 'entführen',\n",
       " 'entgleiten',\n",
       " 'enthaupten',\n",
       " 'entkräftet',\n",
       " 'entlassen',\n",
       " 'entmutigen',\n",
       " 'entnervt',\n",
       " 'entrüstet',\n",
       " 'entschwinden',\n",
       " 'entsetzlich',\n",
       " 'enttäuschen',\n",
       " 'enttäuschend',\n",
       " 'enttäuscht',\n",
       " 'entwürdigend',\n",
       " 'entziehen',\n",
       " 'erbittert',\n",
       " 'erbost',\n",
       " 'erbrechen',\n",
       " 'erbärmlich',\n",
       " 'erdrückend',\n",
       " 'ergaunern',\n",
       " 'ergebnislos',\n",
       " 'erleiden',\n",
       " 'erliegen',\n",
       " 'ermahnen',\n",
       " 'ermorden',\n",
       " 'ermüden',\n",
       " 'erniedrigen',\n",
       " 'ernüchternd',\n",
       " 'erpressen',\n",
       " 'erschießen',\n",
       " 'erschlaffen',\n",
       " 'erschlagen',\n",
       " 'erschrecken',\n",
       " 'erschreckend',\n",
       " 'erschweren',\n",
       " 'erschöpfen',\n",
       " 'erschöpft',\n",
       " 'erschüttern',\n",
       " 'erschütternd',\n",
       " 'erschüttert',\n",
       " 'erstechen',\n",
       " 'ersticken',\n",
       " 'ertrinken',\n",
       " 'erwürgen',\n",
       " 'erzürnt',\n",
       " 'existenzbedrohend',\n",
       " 'explodieren',\n",
       " 'fad',\n",
       " 'fadenscheinig',\n",
       " 'fahrlässig',\n",
       " 'fallen',\n",
       " 'falsch',\n",
       " 'farblos',\n",
       " 'faschistisch',\n",
       " 'fatal',\n",
       " 'faul',\n",
       " 'fehlen',\n",
       " ...]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#flatten\n",
    "from functools import reduce #python 3\n",
    "\n",
    "positive_list1=reduce(lambda x,y: x+y,positive_list)\n",
    "negative_list1=reduce(lambda x,y: x+y,negative_list)\n",
    "\n",
    "negative_list1\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load and process data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SPD - refugee relevant"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Rechte der Flüchtlinge wahren</td>\n",
       "      <td>10.12.2013</td>\n",
       "      <td>Kerstin Griese, Berichterstatterin: Die Europä...</td>\n",
       "      <td>[0.60109723]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Flüchtlinge brauchen weiterhin unsere Hilfe</td>\n",
       "      <td>17.06.2016</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5769732]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Flucht und Vertreibung aktueller denn je</td>\n",
       "      <td>19.06.2016</td>\n",
       "      <td>Hiltrud Lotze, zuständige Berichterstatterin: ...</td>\n",
       "      <td>[0.57330569]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BKA-Zahlen zu Straftaten gegen Asylunterkünfte...</td>\n",
       "      <td>02.08.2016</td>\n",
       "      <td>Eva Högl, stellvertretende Fraktionsvorsitzend...</td>\n",
       "      <td>[0.56408847]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Für eine humane Flüchtlingspolitik der EU</td>\n",
       "      <td>19.06.2015</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5527452]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>268</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title        date  \\\n",
       "0                      Rechte der Flüchtlinge wahren  10.12.2013   \n",
       "1        Flüchtlinge brauchen weiterhin unsere Hilfe  17.06.2016   \n",
       "2           Flucht und Vertreibung aktueller denn je  19.06.2016   \n",
       "3  BKA-Zahlen zu Straftaten gegen Asylunterkünfte...  02.08.2016   \n",
       "4          Für eine humane Flüchtlingspolitik der EU  19.06.2015   \n",
       "\n",
       "                                             content         score  \\\n",
       "0  Kerstin Griese, Berichterstatterin: Die Europä...  [0.60109723]   \n",
       "1  Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5769732]   \n",
       "2  Hiltrud Lotze, zuständige Berichterstatterin: ...  [0.57330569]   \n",
       "3  Eva Högl, stellvertretende Fraktionsvorsitzend...  [0.56408847]   \n",
       "4  Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5527452]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      212  \n",
       "1  OVER-THRESHOLD      224  \n",
       "2  OVER-THRESHOLD      235  \n",
       "3  OVER-THRESHOLD      198  \n",
       "4  OVER-THRESHOLD      268  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#load data\n",
    "spd_rel = pd.read_csv(\"/Users/ashrakatelshehawy/spd_refugeerelevant.csv\",header=None, encoding='utf-8',delimiter='\\t',error_bad_lines=False)\n",
    "spd_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n",
    "spd_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>102</th>\n",
       "      <td>Rechtspopulismus ist nicht salonfähig – kein P...</td>\n",
       "      <td>19.03.2015</td>\n",
       "      <td>Josip Juratovic, Integrationsbeauftragter;Susa...</td>\n",
       "      <td>[0.40215556]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>Griechenland braucht in Flüchtlingskrise die v...</td>\n",
       "      <td>01.04.2016</td>\n",
       "      <td>Frank Schwabe, Sprecher für Menschenrechte und...</td>\n",
       "      <td>[0.37250462]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>271</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Schutzkonzepte in Asylunterkünften müssen Stan...</td>\n",
       "      <td>16.03.2016</td>\n",
       "      <td>Sönke Rix, Sprecher der Arbeitsgruppe Familie,...</td>\n",
       "      <td>[0.53912225]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>Erleichterung der Kontoeröffnung für Flüchtlin...</td>\n",
       "      <td>03.09.2015</td>\n",
       "      <td>Jens Zimmermann, zuständiger Berichterstatter:...</td>\n",
       "      <td>[0.43409866]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>262</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>Türkei: Gewalt beenden, Wahlen sichern, demokr...</td>\n",
       "      <td>12.10.2015</td>\n",
       "      <td>Michelle Müntefering, zuständige Berichterstat...</td>\n",
       "      <td>[0.36289514]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>Aus Folter-Vorwürfen in Deutschland Konsequenz...</td>\n",
       "      <td>25.06.2015</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.43957967]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>170</th>\n",
       "      <td>Kabinett: Mitwirkungspflichten im Widerrufs- u...</td>\n",
       "      <td>01.08.2018</td>\n",
       "      <td>Eva Högl, stellvertretende Fraktionsvorsitzend...</td>\n",
       "      <td>[0.35258447]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>Meilenstein in der Flüchtlingspolitik: Integra...</td>\n",
       "      <td>25.05.2016</td>\n",
       "      <td>Katja Mast, arbeits- und sozialpolitische Spre...</td>\n",
       "      <td>[0.45058358]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Flüchtlingskinder besser schützen</td>\n",
       "      <td>21.09.2015</td>\n",
       "      <td>Sönke Rix, Sprecher der Arbeitsgruppe Familie,...</td>\n",
       "      <td>[0.46894189]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>142</th>\n",
       "      <td>Nationaler Bildungsbericht: Jetzt gemeinsam fü...</td>\n",
       "      <td>16.06.2016</td>\n",
       "      <td>Ernst Dieter Rossmann, Sprecher der Arbeitsgru...</td>\n",
       "      <td>[0.37360209]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>269</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>Menschenhandel entschieden bekämpfen</td>\n",
       "      <td>29.07.2019</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.42202814]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>133</th>\n",
       "      <td>Europa muss handeln: Odyssee auf dem Mittelmee...</td>\n",
       "      <td>09.01.2019</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.37968588]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>Fluchtursachen bekämpfen – Zukunftsperspektive...</td>\n",
       "      <td>28.07.2017</td>\n",
       "      <td>Stefan Rebmann, entwicklungspolitischer Sprech...</td>\n",
       "      <td>[0.46417272]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>Nein zu Pflichtdiensten, ja zu mehr Stellen fü...</td>\n",
       "      <td>03.11.2015</td>\n",
       "      <td>Sönke Rix, Sprecher der Arbeitsgruppe Familie,...</td>\n",
       "      <td>[0.41800914]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>207</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>124</th>\n",
       "      <td>Europa muss ein Europa ohne Zäune sein</td>\n",
       "      <td>19.06.2015</td>\n",
       "      <td>Detlef Müller, zuständiger Berichterstatter;Ch...</td>\n",
       "      <td>[0.38465508]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>Flüchtlinge und Vertriebene brauchen unsere Hilfe</td>\n",
       "      <td>19.06.2014</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.45647007]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>Folgen des Klimawandels werden zur Fluchtursache</td>\n",
       "      <td>27.11.2015</td>\n",
       "      <td>Bärbel Kofler, Sprecherin der Arbeitsgruppe Wi...</td>\n",
       "      <td>[0.36417856]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>Friedensdialog in Burundi fördern</td>\n",
       "      <td>21.10.2016</td>\n",
       "      <td>Gabriela Heinrich, stellvertretende menschenre...</td>\n",
       "      <td>[0.44707988]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>262</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Unbegleiteten minderjährigen Flüchtlingen gute...</td>\n",
       "      <td>15.07.2015</td>\n",
       "      <td>Sönke Rix, Sprecher der Arbeitsgruppe Familie,...</td>\n",
       "      <td>[0.50327557]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>Wochen gegen Rassismus – 365 Tage Gelegenheit ...</td>\n",
       "      <td>15.03.2015</td>\n",
       "      <td>Susann Rüthrich, Sprecherin der AG Strategien ...</td>\n",
       "      <td>[0.45899099]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>Das neue Integrationsgesetz: Großer Fortschrit...</td>\n",
       "      <td>03.06.2016</td>\n",
       "      <td>Kerstin Griese, zuständige Berichterstatterin;...</td>\n",
       "      <td>[0.45456641]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>107</th>\n",
       "      <td>Weltkindertag – 28 Millionen Kinder auf der Fl...</td>\n",
       "      <td>19.09.2016</td>\n",
       "      <td>Stefan Rebmann, entwicklungspolitischer Sprech...</td>\n",
       "      <td>[0.39687076]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Vorsätzliche Angriffe auf Hilfskonvois sind ei...</td>\n",
       "      <td>20.09.2016</td>\n",
       "      <td>Frank Schwabe, Sprecher für Menschenrechte und...</td>\n",
       "      <td>[0.47296591]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>Deutsch-italienische Parlamentarierinitiative ...</td>\n",
       "      <td>22.04.2015</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.38360115]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Flüchtlinge brauchen weiterhin unsere Hilfe</td>\n",
       "      <td>17.06.2016</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5769732]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title        date  \\\n",
       "102  Rechtspopulismus ist nicht salonfähig – kein P...  19.03.2015   \n",
       "144  Griechenland braucht in Flüchtlingskrise die v...  01.04.2016   \n",
       "7    Schutzkonzepte in Asylunterkünften müssen Stan...  16.03.2016   \n",
       "79   Erleichterung der Kontoeröffnung für Flüchtlin...  03.09.2015   \n",
       "154  Türkei: Gewalt beenden, Wahlen sichern, demokr...  12.10.2015   \n",
       "71   Aus Folter-Vorwürfen in Deutschland Konsequenz...  25.06.2015   \n",
       "170  Kabinett: Mitwirkungspflichten im Widerrufs- u...  01.08.2018   \n",
       "61   Meilenstein in der Flüchtlingspolitik: Integra...  25.05.2016   \n",
       "37                   Flüchtlingskinder besser schützen  21.09.2015   \n",
       "142  Nationaler Bildungsbericht: Jetzt gemeinsam fü...  16.06.2016   \n",
       "90                Menschenhandel entschieden bekämpfen  29.07.2019   \n",
       "133  Europa muss handeln: Odyssee auf dem Mittelmee...  09.01.2019   \n",
       "44   Fluchtursachen bekämpfen – Zukunftsperspektive...  28.07.2017   \n",
       "92   Nein zu Pflichtdiensten, ja zu mehr Stellen fü...  03.11.2015   \n",
       "124             Europa muss ein Europa ohne Zäune sein  19.06.2015   \n",
       "52   Flüchtlinge und Vertriebene brauchen unsere Hilfe  19.06.2014   \n",
       "152   Folgen des Klimawandels werden zur Fluchtursache  27.11.2015   \n",
       "63                   Friedensdialog in Burundi fördern  21.10.2016   \n",
       "16   Unbegleiteten minderjährigen Flüchtlingen gute...  15.07.2015   \n",
       "48   Wochen gegen Rassismus – 365 Tage Gelegenheit ...  15.03.2015   \n",
       "54   Das neue Integrationsgesetz: Großer Fortschrit...  03.06.2016   \n",
       "107  Weltkindertag – 28 Millionen Kinder auf der Fl...  19.09.2016   \n",
       "29   Vorsätzliche Angriffe auf Hilfskonvois sind ei...  20.09.2016   \n",
       "126  Deutsch-italienische Parlamentarierinitiative ...  22.04.2015   \n",
       "1          Flüchtlinge brauchen weiterhin unsere Hilfe  17.06.2016   \n",
       "\n",
       "                                               content         score  \\\n",
       "102  Josip Juratovic, Integrationsbeauftragter;Susa...  [0.40215556]   \n",
       "144  Frank Schwabe, Sprecher für Menschenrechte und...  [0.37250462]   \n",
       "7    Sönke Rix, Sprecher der Arbeitsgruppe Familie,...  [0.53912225]   \n",
       "79   Jens Zimmermann, zuständiger Berichterstatter:...  [0.43409866]   \n",
       "154  Michelle Müntefering, zuständige Berichterstat...  [0.36289514]   \n",
       "71   Frank Schwabe, menschenrechtspolitischer Sprec...  [0.43957967]   \n",
       "170  Eva Högl, stellvertretende Fraktionsvorsitzend...  [0.35258447]   \n",
       "61   Katja Mast, arbeits- und sozialpolitische Spre...  [0.45058358]   \n",
       "37   Sönke Rix, Sprecher der Arbeitsgruppe Familie,...  [0.46894189]   \n",
       "142  Ernst Dieter Rossmann, Sprecher der Arbeitsgru...  [0.37360209]   \n",
       "90   Frank Schwabe, menschenrechtspolitischer Sprec...  [0.42202814]   \n",
       "133  Frank Schwabe, menschenrechtspolitischer Sprec...  [0.37968588]   \n",
       "44   Stefan Rebmann, entwicklungspolitischer Sprech...  [0.46417272]   \n",
       "92   Sönke Rix, Sprecher der Arbeitsgruppe Familie,...  [0.41800914]   \n",
       "124  Detlef Müller, zuständiger Berichterstatter;Ch...  [0.38465508]   \n",
       "52   Frank Schwabe, menschenrechtspolitischer Sprec...  [0.45647007]   \n",
       "152  Bärbel Kofler, Sprecherin der Arbeitsgruppe Wi...  [0.36417856]   \n",
       "63   Gabriela Heinrich, stellvertretende menschenre...  [0.44707988]   \n",
       "16   Sönke Rix, Sprecher der Arbeitsgruppe Familie,...  [0.50327557]   \n",
       "48   Susann Rüthrich, Sprecherin der AG Strategien ...  [0.45899099]   \n",
       "54   Kerstin Griese, zuständige Berichterstatterin;...  [0.45456641]   \n",
       "107  Stefan Rebmann, entwicklungspolitischer Sprech...  [0.39687076]   \n",
       "29   Frank Schwabe, Sprecher für Menschenrechte und...  [0.47296591]   \n",
       "126  Frank Schwabe, menschenrechtspolitischer Sprec...  [0.38360115]   \n",
       "1    Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5769732]   \n",
       "\n",
       "       how_included  n_words  \n",
       "102  OVER-THRESHOLD      166  \n",
       "144  OVER-THRESHOLD      271  \n",
       "7    OVER-THRESHOLD      130  \n",
       "79   OVER-THRESHOLD      262  \n",
       "154  OVER-THRESHOLD      294  \n",
       "71   OVER-THRESHOLD      255  \n",
       "170  OVER-THRESHOLD      171  \n",
       "61   OVER-THRESHOLD      307  \n",
       "37   OVER-THRESHOLD      206  \n",
       "142  OVER-THRESHOLD      269  \n",
       "90   OVER-THRESHOLD      206  \n",
       "133  OVER-THRESHOLD      197  \n",
       "44   OVER-THRESHOLD      197  \n",
       "92   OVER-THRESHOLD      207  \n",
       "124  OVER-THRESHOLD      248  \n",
       "52   OVER-THRESHOLD      300  \n",
       "152  OVER-THRESHOLD      248  \n",
       "63   OVER-THRESHOLD      262  \n",
       "16   OVER-THRESHOLD      144  \n",
       "48   OVER-THRESHOLD      268  \n",
       "54   OVER-THRESHOLD      292  \n",
       "107  OVER-THRESHOLD      282  \n",
       "29   OVER-THRESHOLD      224  \n",
       "126  OVER-THRESHOLD      187  \n",
       "1    OVER-THRESHOLD      224  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#check length\n",
    "len(spd_rel)\n",
    "#take random sample of 25\n",
    "spd_rel_sample=spd_rel.sample(25)\n",
    "spd_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Rechte der Flüchtlinge wahren</td>\n",
       "      <td>10.12.2013</td>\n",
       "      <td>Kerstin Griese, Berichterstatterin: Die Europä...</td>\n",
       "      <td>[0.60109723]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Flüchtlinge brauchen weiterhin unsere Hilfe</td>\n",
       "      <td>17.06.2016</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5769732]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Flucht und Vertreibung aktueller denn je</td>\n",
       "      <td>19.06.2016</td>\n",
       "      <td>Hiltrud Lotze, zuständige Berichterstatterin: ...</td>\n",
       "      <td>[0.57330569]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BKA-Zahlen zu Straftaten gegen Asylunterkünfte...</td>\n",
       "      <td>02.08.2016</td>\n",
       "      <td>Eva Högl, stellvertretende Fraktionsvorsitzend...</td>\n",
       "      <td>[0.56408847]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Für eine humane Flüchtlingspolitik der EU</td>\n",
       "      <td>19.06.2015</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5527452]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>268</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title        date  \\\n",
       "0                      Rechte der Flüchtlinge wahren  10.12.2013   \n",
       "1        Flüchtlinge brauchen weiterhin unsere Hilfe  17.06.2016   \n",
       "2           Flucht und Vertreibung aktueller denn je  19.06.2016   \n",
       "3  BKA-Zahlen zu Straftaten gegen Asylunterkünfte...  02.08.2016   \n",
       "4          Für eine humane Flüchtlingspolitik der EU  19.06.2015   \n",
       "\n",
       "                                             content         score  \\\n",
       "0  Kerstin Griese, Berichterstatterin: Die Europä...  [0.60109723]   \n",
       "1  Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5769732]   \n",
       "2  Hiltrud Lotze, zuständige Berichterstatterin: ...  [0.57330569]   \n",
       "3  Eva Högl, stellvertretende Fraktionsvorsitzend...  [0.56408847]   \n",
       "4  Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5527452]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      212  \n",
       "1  OVER-THRESHOLD      224  \n",
       "2  OVER-THRESHOLD      235  \n",
       "3  OVER-THRESHOLD      198  \n",
       "4  OVER-THRESHOLD      268  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#give column names\n",
    "spd_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n",
    "spd_rel.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [title, date, content, score, how_included, n_words]\n",
       "Index: []"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#are there any missing values?\n",
    "spd_rel_nan = spd_rel[spd_rel.isna().any(axis=1)]\n",
    "spd_rel_nan"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Rechte der Flüchtlinge wahren</td>\n",
       "      <td>122013</td>\n",
       "      <td>Kerstin Griese, Berichterstatterin: Die Europä...</td>\n",
       "      <td>[0.60109723]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Flüchtlinge brauchen weiterhin unsere Hilfe</td>\n",
       "      <td>062016</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5769732]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Flucht und Vertreibung aktueller denn je</td>\n",
       "      <td>062016</td>\n",
       "      <td>Hiltrud Lotze, zuständige Berichterstatterin: ...</td>\n",
       "      <td>[0.57330569]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BKA-Zahlen zu Straftaten gegen Asylunterkünfte...</td>\n",
       "      <td>082016</td>\n",
       "      <td>Eva Högl, stellvertretende Fraktionsvorsitzend...</td>\n",
       "      <td>[0.56408847]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Für eine humane Flüchtlingspolitik der EU</td>\n",
       "      <td>062015</td>\n",
       "      <td>Frank Schwabe, menschenrechtspolitischer Sprec...</td>\n",
       "      <td>[0.5527452]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>268</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title    date  \\\n",
       "0                      Rechte der Flüchtlinge wahren  122013   \n",
       "1        Flüchtlinge brauchen weiterhin unsere Hilfe  062016   \n",
       "2           Flucht und Vertreibung aktueller denn je  062016   \n",
       "3  BKA-Zahlen zu Straftaten gegen Asylunterkünfte...  082016   \n",
       "4          Für eine humane Flüchtlingspolitik der EU  062015   \n",
       "\n",
       "                                             content         score  \\\n",
       "0  Kerstin Griese, Berichterstatterin: Die Europä...  [0.60109723]   \n",
       "1  Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5769732]   \n",
       "2  Hiltrud Lotze, zuständige Berichterstatterin: ...  [0.57330569]   \n",
       "3  Eva Högl, stellvertretende Fraktionsvorsitzend...  [0.56408847]   \n",
       "4  Frank Schwabe, menschenrechtspolitischer Sprec...   [0.5527452]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      212  \n",
       "1  OVER-THRESHOLD      224  \n",
       "2  OVER-THRESHOLD      235  \n",
       "3  OVER-THRESHOLD      198  \n",
       "4  OVER-THRESHOLD      268  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#remove first three characters in the day column -the day and the first .\n",
    "spd_rel['date'] = spd_rel['date'].str[3:]\n",
    "#replace . with nothing\n",
    "spd_rel['date'] = [x.replace('.', '') for x in spd_rel['date']]\n",
    "spd_rel.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Deutsches Institut für Menschenrechte auf gese...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Einigung zum Ausweisungsrecht ist Vorschlag mi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Vorwürfe müssen aufgeklärt werden Johannes Kah...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Kürzung der US-Hilfsmittel für Palästina versc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Europa muss handeln: Odyssee auf dem Mittelmee...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content\n",
       "0  012015  Deutsches Institut für Menschenrechte auf gese...\n",
       "1  012016  Einigung zum Ausweisungsrecht ist Vorschlag mi...\n",
       "2  012017  Vorwürfe müssen aufgeklärt werden Johannes Kah...\n",
       "3  012018  Kürzung der US-Hilfsmittel für Palästina versc...\n",
       "4  012019  Europa muss handeln: Odyssee auf dem Mittelmee..."
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#create a content column that includes title and content\n",
    "spd_rel[\"content\"] = spd_rel[\"title\"] +[\" \"]+ spd_rel[\"content\"] \n",
    "#aggregate by date\n",
    "spd_rel=spd_rel.groupby(['date'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "\n",
    "spd_rel.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['012015',\n",
       " '012016',\n",
       " '012017',\n",
       " '012018',\n",
       " '012019',\n",
       " '022015',\n",
       " '022016',\n",
       " '022017',\n",
       " '022019',\n",
       " '022020',\n",
       " '032014',\n",
       " '032015',\n",
       " '032016',\n",
       " '032018',\n",
       " '042015',\n",
       " '042016',\n",
       " '042018',\n",
       " '052015',\n",
       " '052016',\n",
       " '052017',\n",
       " '052018',\n",
       " '052019',\n",
       " '062014',\n",
       " '062015',\n",
       " '062016',\n",
       " '062017',\n",
       " '062018',\n",
       " '062019',\n",
       " '072014',\n",
       " '072015',\n",
       " '072016',\n",
       " '072017',\n",
       " '072018',\n",
       " '072019',\n",
       " '082014',\n",
       " '082015',\n",
       " '082016',\n",
       " '082018',\n",
       " '092014',\n",
       " '092015',\n",
       " '092016',\n",
       " '092017',\n",
       " '092018',\n",
       " '102014',\n",
       " '102015',\n",
       " '102016',\n",
       " '102018',\n",
       " '112014',\n",
       " '112015',\n",
       " '112016',\n",
       " '112017',\n",
       " '122013',\n",
       " '122014',\n",
       " '122015',\n",
       " '122016',\n",
       " '122017',\n",
       " '122018']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(spd_rel)\n",
    "#check if values look good - yes they do\n",
    "spd_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "#put the content through the nlp pipeline\n",
    "spd_rel[\"nlpprocessed\"]=spd_rel['content'].apply(nlp_pipeline)\n",
    "spd_rel_final=spd_rel\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Deutsches Institut für Menschenrechte auf gese...</td>\n",
       "      <td>[deutsches, institut, menschenrechte, gesetzli...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Einigung zum Ausweisungsrecht ist Vorschlag mi...</td>\n",
       "      <td>[einigung, ausweisungsrecht, vorschlag, augenm...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Vorwürfe müssen aufgeklärt werden Johannes Kah...</td>\n",
       "      <td>[vorwürfe, müssen, aufgeklärt, johannes, kahrs...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Kürzung der US-Hilfsmittel für Palästina versc...</td>\n",
       "      <td>[kürzung, palästina, verschärft, situation, na...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Europa muss handeln: Odyssee auf dem Mittelmee...</td>\n",
       "      <td>[europa, handeln, odyssee, mittelmeer, darf, w...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content  \\\n",
       "0  012015  Deutsches Institut für Menschenrechte auf gese...   \n",
       "1  012016  Einigung zum Ausweisungsrecht ist Vorschlag mi...   \n",
       "2  012017  Vorwürfe müssen aufgeklärt werden Johannes Kah...   \n",
       "3  012018  Kürzung der US-Hilfsmittel für Palästina versc...   \n",
       "4  012019  Europa muss handeln: Odyssee auf dem Mittelmee...   \n",
       "\n",
       "                                        nlpprocessed  \n",
       "0  [deutsches, institut, menschenrechte, gesetzli...  \n",
       "1  [einigung, ausweisungsrecht, vorschlag, augenm...  \n",
       "2  [vorwürfe, müssen, aufgeklärt, johannes, kahrs...  \n",
       "3  [kürzung, palästina, verschärft, situation, na...  \n",
       "4  [europa, handeln, odyssee, mittelmeer, darf, w...  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spd_rel_final.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Deutsches Institut für Menschenrechte auf gese...</td>\n",
       "      <td>[deutsches, institut, menschenrechte, gesetzli...</td>\n",
       "      <td>deutsches,institut,menschenrechte,gesetzliche,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Einigung zum Ausweisungsrecht ist Vorschlag mi...</td>\n",
       "      <td>[einigung, ausweisungsrecht, vorschlag, augenm...</td>\n",
       "      <td>einigung,ausweisungsrecht,vorschlag,augenmaß,e...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Vorwürfe müssen aufgeklärt werden Johannes Kah...</td>\n",
       "      <td>[vorwürfe, müssen, aufgeklärt, johannes, kahrs...</td>\n",
       "      <td>vorwürfe,müssen,aufgeklärt,johannes,kahrs,beau...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Kürzung der US-Hilfsmittel für Palästina versc...</td>\n",
       "      <td>[kürzung, palästina, verschärft, situation, na...</td>\n",
       "      <td>kürzung,palästina,verschärft,situation,nahen,o...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Europa muss handeln: Odyssee auf dem Mittelmee...</td>\n",
       "      <td>[europa, handeln, odyssee, mittelmeer, darf, w...</td>\n",
       "      <td>europa,handeln,odyssee,mittelmeer,darf,wiederh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>022015</td>\n",
       "      <td>EU muss Schutz von Menschenleben vor Schutz de...</td>\n",
       "      <td>[eu, schutz, menschenleben, schutz, grenzen, s...</td>\n",
       "      <td>eu,schutz,menschenleben,schutz,grenzen,stellen...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>022016</td>\n",
       "      <td>Finanzierung der humanitären Hilfe langfristig...</td>\n",
       "      <td>[finanzierung, humanitären, hilfe, langfristig...</td>\n",
       "      <td>finanzierung,humanitären,hilfe,langfristig,sic...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>022017</td>\n",
       "      <td>Humanitäre Hilfe im Jemen verstärken Frank Sch...</td>\n",
       "      <td>[humanitäre, hilfe, jemen, verstärken, frank, ...</td>\n",
       "      <td>humanitäre,hilfe,jemen,verstärken,frank,schwab...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>022019</td>\n",
       "      <td>Weibliche Genitalverstümmelung bekämpfen Gabri...</td>\n",
       "      <td>[weibliche, genitalverstümmelung, bekämpfen, g...</td>\n",
       "      <td>weibliche,genitalverstümmelung,bekämpfen,gabri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>022020</td>\n",
       "      <td>Fachkräfte herzlich willkommen Ute Vogt, innen...</td>\n",
       "      <td>[fachkräfte, herzlich, willkommen, ute, vogt, ...</td>\n",
       "      <td>fachkräfte,herzlich,willkommen,ute,vogt,innenp...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>032014</td>\n",
       "      <td>Ohne Angst anders sein – Flagge zeigen gegen R...</td>\n",
       "      <td>[angst, flagge, zeigen, rassismus, menschenfei...</td>\n",
       "      <td>angst,flagge,zeigen,rassismus,menschenfeindlic...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>032015</td>\n",
       "      <td>Wochen gegen Rassismus – 365 Tage Gelegenheit ...</td>\n",
       "      <td>[wochen, rassismus, tage, gelegenheit, zivilco...</td>\n",
       "      <td>wochen,rassismus,tage,gelegenheit,zivilcourage...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>032016</td>\n",
       "      <td>Schutzkonzepte in Asylunterkünften müssen Stan...</td>\n",
       "      <td>[schutzkonzepte, asylunterkünften, müssen, sta...</td>\n",
       "      <td>schutzkonzepte,asylunterkünften,müssen,standar...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>032018</td>\n",
       "      <td>Sozialdemokraten bleiben Hoffnungsträger für Z...</td>\n",
       "      <td>[sozialdemokraten, bleiben, hoffnungsträger, z...</td>\n",
       "      <td>sozialdemokraten,bleiben,hoffnungsträger,zukun...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>042015</td>\n",
       "      <td>Flüchtlingskatatstrophe im Mittelmeer ist Scha...</td>\n",
       "      <td>[flüchtlingskatatstrophe, mittelmeer, schande,...</td>\n",
       "      <td>flüchtlingskatatstrophe,mittelmeer,schande,eur...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>042016</td>\n",
       "      <td>Keine zusätzliche Belastung für Kommunen durch...</td>\n",
       "      <td>[zusätzliche, belastung, kommunen, sozialhilfe...</td>\n",
       "      <td>zusätzliche,belastung,kommunen,sozialhilfeleis...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>042018</td>\n",
       "      <td>Die Gewaltexzesse gegen die Rohingya stoppen  ...</td>\n",
       "      <td>[gewaltexzesse, rohingya, stoppen, frank, schw...</td>\n",
       "      <td>gewaltexzesse,rohingya,stoppen,frank,schwabe,s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>052015</td>\n",
       "      <td>Das Existenzminimum ist nicht verhandelbar Jos...</td>\n",
       "      <td>[existenzminimum, verhandelbar, josip, juratov...</td>\n",
       "      <td>existenzminimum,verhandelbar,josip,juratovic,i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>052016</td>\n",
       "      <td>Meilenstein in der Flüchtlingspolitik: Integra...</td>\n",
       "      <td>[meilenstein, flüchtlingspolitik, integrations...</td>\n",
       "      <td>meilenstein,flüchtlingspolitik,integrationsges...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>052017</td>\n",
       "      <td>Aktion Fluchtgedenken: Aus den Augen, nicht au...</td>\n",
       "      <td>[aktion, fluchtgedenken, augen, sinn, norbert,...</td>\n",
       "      <td>aktion,fluchtgedenken,augen,sinn,norbert,spinr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>052018</td>\n",
       "      <td>Verfolgung von Homo- und Transsexuellen weltwe...</td>\n",
       "      <td>[verfolgung, transsexuellen, weltweit, beenden...</td>\n",
       "      <td>verfolgung,transsexuellen,weltweit,beenden,gab...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>052019</td>\n",
       "      <td>Queere Menschen müssen wirksam geschützt werde...</td>\n",
       "      <td>[queere, menschen, müssen, wirksam, geschützt,...</td>\n",
       "      <td>queere,menschen,müssen,wirksam,geschützt,brunn...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>062014</td>\n",
       "      <td>Weltflüchtlingstag 2014 – Gesellschaftliche Ak...</td>\n",
       "      <td>[weltflüchtlingstag, gesellschaftliche, akzept...</td>\n",
       "      <td>weltflüchtlingstag,gesellschaftliche,akzeptanz...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>062015</td>\n",
       "      <td>Für eine humane Flüchtlingspolitik der EU Fran...</td>\n",
       "      <td>[humane, flüchtlingspolitik, eu, frank, schwab...</td>\n",
       "      <td>humane,flüchtlingspolitik,eu,frank,schwabe,men...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>062016</td>\n",
       "      <td>Flüchtlinge brauchen weiterhin unsere Hilfe Fr...</td>\n",
       "      <td>[flüchtlinge, brauchen, weiterhin, hilfe, fran...</td>\n",
       "      <td>flüchtlinge,brauchen,weiterhin,hilfe,frank,sch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>062017</td>\n",
       "      <td>Folter ist ein Angriff auf die Menschenwürde F...</td>\n",
       "      <td>[folter, angriff, menschenwürde, frank, schwab...</td>\n",
       "      <td>folter,angriff,menschenwürde,frank,schwabe,men...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>062018</td>\n",
       "      <td>Ungarn plant Angriff auf die Zivilgesellschaft...</td>\n",
       "      <td>[ungarn, plant, angriff, zivilgesellschaft, fr...</td>\n",
       "      <td>ungarn,plant,angriff,zivilgesellschaft,frank,s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>062019</td>\n",
       "      <td>Mehr als 70 Millionen Menschen auf der Flucht ...</td>\n",
       "      <td>[mehr, millionen, menschen, flucht, frank, sch...</td>\n",
       "      <td>mehr,millionen,menschen,flucht,frank,schwabe,m...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>072014</td>\n",
       "      <td>Schwerste Verbrechen an irakischen Christen Ke...</td>\n",
       "      <td>[schwerste, verbrechen, irakischen, christen, ...</td>\n",
       "      <td>schwerste,verbrechen,irakischen,christen,kerst...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>072015</td>\n",
       "      <td>Unbegleiteten minderjährigen Flüchtlingen gute...</td>\n",
       "      <td>[unbegleiteten, minderjährigen, flüchtlingen, ...</td>\n",
       "      <td>unbegleiteten,minderjährigen,flüchtlingen,gute...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>072016</td>\n",
       "      <td>Integrationsgesetz: Mit weiteren Verbesserunge...</td>\n",
       "      <td>[integrationsgesetz, weiteren, verbesserungen,...</td>\n",
       "      <td>integrationsgesetz,weiteren,verbesserungen,zie...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>072017</td>\n",
       "      <td>Fluchtursachen bekämpfen – Zukunftsperspektive...</td>\n",
       "      <td>[fluchtursachen, bekämpfen, zukunftsperspektiv...</td>\n",
       "      <td>fluchtursachen,bekämpfen,zukunftsperspektiven,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>072018</td>\n",
       "      <td>NSU: Aufarbeitung noch nicht am Ende Eva Högl,...</td>\n",
       "      <td>[nsu, aufarbeitung, ende, eva, högl, stellvert...</td>\n",
       "      <td>nsu,aufarbeitung,ende,eva,högl,stellvertretend...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>072019</td>\n",
       "      <td>Menschenhandel entschieden bekämpfen Frank Sch...</td>\n",
       "      <td>[menschenhandel, entschieden, bekämpfen, frank...</td>\n",
       "      <td>menschenhandel,entschieden,bekämpfen,frank,sch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>082014</td>\n",
       "      <td>Irakische Jesiden brauchen Hilfe Frank Schwabe...</td>\n",
       "      <td>[irakische, jesiden, brauchen, hilfe, frank, s...</td>\n",
       "      <td>irakische,jesiden,brauchen,hilfe,frank,schwabe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>082015</td>\n",
       "      <td>Entschlossen gegen Rechtsextremismus Sönke Rix...</td>\n",
       "      <td>[entschlossen, rechtsextremismus, sönke, rix, ...</td>\n",
       "      <td>entschlossen,rechtsextremismus,sönke,rix,sprec...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>082016</td>\n",
       "      <td>BKA-Zahlen zu Straftaten gegen Asylunterkünfte...</td>\n",
       "      <td>[straftaten, asylunterkünfte, erschreckend, ev...</td>\n",
       "      <td>straftaten,asylunterkünfte,erschreckend,eva,hö...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>082018</td>\n",
       "      <td>Spurwechsel: Stichtagsregelung wäre die beste ...</td>\n",
       "      <td>[spurwechsel, stichtagsregelung, wäre, beste, ...</td>\n",
       "      <td>spurwechsel,stichtagsregelung,wäre,beste,lösun...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>092014</td>\n",
       "      <td>Mehr irakische Flüchtlinge aufnehmen Eva Högl,...</td>\n",
       "      <td>[mehr, irakische, flüchtlinge, aufnehmen, eva,...</td>\n",
       "      <td>mehr,irakische,flüchtlinge,aufnehmen,eva,högl,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>092015</td>\n",
       "      <td>Bessere Betreuung von Flüchtlingskindern Sönke...</td>\n",
       "      <td>[bessere, betreuung, flüchtlingskindern, sönke...</td>\n",
       "      <td>bessere,betreuung,flüchtlingskindern,sönke,rix...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>092016</td>\n",
       "      <td>Vorsätzliche Angriffe auf Hilfskonvois sind ei...</td>\n",
       "      <td>[vorsätzliche, angriffe, hilfskonvois, kriegsv...</td>\n",
       "      <td>vorsätzliche,angriffe,hilfskonvois,kriegsverbr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>092017</td>\n",
       "      <td>Flüchtlingskindern eine Chance durch Bildung g...</td>\n",
       "      <td>[flüchtlingskindern, chance, bildung, geben, f...</td>\n",
       "      <td>flüchtlingskindern,chance,bildung,geben,frank,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>092018</td>\n",
       "      <td>Kinder auf der Flucht benötigen unseren Schutz...</td>\n",
       "      <td>[kinder, flucht, benötigen, schutz, frank, sch...</td>\n",
       "      <td>kinder,flucht,benötigen,schutz,frank,schwabe,s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>102014</td>\n",
       "      <td>Hunger und Armut bis 2030 überwinden  Axel Sch...</td>\n",
       "      <td>[hunger, armut, überwinden, axel, schäfer, ste...</td>\n",
       "      <td>hunger,armut,überwinden,axel,schäfer,stellvert...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>102015</td>\n",
       "      <td>Jetzt müssen wir es machen Axel Schäfer, stell...</td>\n",
       "      <td>[müssen, axel, schäfer, stellvertretender, fra...</td>\n",
       "      <td>müssen,axel,schäfer,stellvertretender,fraktion...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>102016</td>\n",
       "      <td>Friedensdialog in Burundi fördern Gabriela Hei...</td>\n",
       "      <td>[friedensdialog, burundi, fördern, gabriela, h...</td>\n",
       "      <td>friedensdialog,burundi,fördern,gabriela,heinri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>102018</td>\n",
       "      <td>Millionen Menschen von humanitärer Hilfe abges...</td>\n",
       "      <td>[millionen, menschen, humanitärer, hilfe, abge...</td>\n",
       "      <td>millionen,menschen,humanitärer,hilfe,abgeschni...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>112014</td>\n",
       "      <td>Gleichwertigkeit aller Menschen verteidigen Sö...</td>\n",
       "      <td>[gleichwertigkeit, menschen, verteidigen, sönk...</td>\n",
       "      <td>gleichwertigkeit,menschen,verteidigen,sönke,ri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>112015</td>\n",
       "      <td>Aus den Augen der Kinder Sönke Rix, Sprecher d...</td>\n",
       "      <td>[augen, kinder, sönke, rix, sprecher, arbeitsg...</td>\n",
       "      <td>augen,kinder,sönke,rix,sprecher,arbeitsgruppe,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>112016</td>\n",
       "      <td>Bundeshaushalt 2017: Arbeitsmarkt- und Sozialp...</td>\n",
       "      <td>[bundeshaushalt, sozialpolitik, menschen, ewal...</td>\n",
       "      <td>bundeshaushalt,sozialpolitik,menschen,ewald,sc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>112017</td>\n",
       "      <td>Für eine moderne Migrationspolitik – SPD Bunde...</td>\n",
       "      <td>[moderne, migrationspolitik, spd, bundestagsfr...</td>\n",
       "      <td>moderne,migrationspolitik,spd,bundestagsfrakti...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>122013</td>\n",
       "      <td>Rechte der Flüchtlinge wahren Kerstin Griese, ...</td>\n",
       "      <td>[rechte, flüchtlinge, wahren, kerstin, griese,...</td>\n",
       "      <td>rechte,flüchtlinge,wahren,kerstin,griese,beric...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>122014</td>\n",
       "      <td>Pegida: Gefährlicher Unsinn Burkhard Lischka, ...</td>\n",
       "      <td>[pegida, gefährlicher, unsinn, burkhard, lisch...</td>\n",
       "      <td>pegida,gefährlicher,unsinn,burkhard,lischka,in...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>122015</td>\n",
       "      <td>Für eine menschliche Flüchtlingspolitik Frank ...</td>\n",
       "      <td>[menschliche, flüchtlingspolitik, frank, schwa...</td>\n",
       "      <td>menschliche,flüchtlingspolitik,frank,schwabe,m...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>122016</td>\n",
       "      <td>Erster Bericht des Deutschen Instituts für Men...</td>\n",
       "      <td>[erster, bericht, deutschen, instituts, mensch...</td>\n",
       "      <td>erster,bericht,deutschen,instituts,menschenrec...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>122017</td>\n",
       "      <td>Zum \"Internationalen Tag der Menschenrechte\" d...</td>\n",
       "      <td>[internationalen, tag, menschenrechte, recht, ...</td>\n",
       "      <td>internationalen,tag,menschenrechte,recht,famil...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>122018</td>\n",
       "      <td>Danke an alle Engagierten Sönke Rix, familienp...</td>\n",
       "      <td>[danke, engagierten, sönke, rix, familienpolit...</td>\n",
       "      <td>danke,engagierten,sönke,rix,familienpolitische...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      date                                            content  \\\n",
       "0   012015  Deutsches Institut für Menschenrechte auf gese...   \n",
       "1   012016  Einigung zum Ausweisungsrecht ist Vorschlag mi...   \n",
       "2   012017  Vorwürfe müssen aufgeklärt werden Johannes Kah...   \n",
       "3   012018  Kürzung der US-Hilfsmittel für Palästina versc...   \n",
       "4   012019  Europa muss handeln: Odyssee auf dem Mittelmee...   \n",
       "5   022015  EU muss Schutz von Menschenleben vor Schutz de...   \n",
       "6   022016  Finanzierung der humanitären Hilfe langfristig...   \n",
       "7   022017  Humanitäre Hilfe im Jemen verstärken Frank Sch...   \n",
       "8   022019  Weibliche Genitalverstümmelung bekämpfen Gabri...   \n",
       "9   022020  Fachkräfte herzlich willkommen Ute Vogt, innen...   \n",
       "10  032014  Ohne Angst anders sein – Flagge zeigen gegen R...   \n",
       "11  032015  Wochen gegen Rassismus – 365 Tage Gelegenheit ...   \n",
       "12  032016  Schutzkonzepte in Asylunterkünften müssen Stan...   \n",
       "13  032018  Sozialdemokraten bleiben Hoffnungsträger für Z...   \n",
       "14  042015  Flüchtlingskatatstrophe im Mittelmeer ist Scha...   \n",
       "15  042016  Keine zusätzliche Belastung für Kommunen durch...   \n",
       "16  042018  Die Gewaltexzesse gegen die Rohingya stoppen  ...   \n",
       "17  052015  Das Existenzminimum ist nicht verhandelbar Jos...   \n",
       "18  052016  Meilenstein in der Flüchtlingspolitik: Integra...   \n",
       "19  052017  Aktion Fluchtgedenken: Aus den Augen, nicht au...   \n",
       "20  052018  Verfolgung von Homo- und Transsexuellen weltwe...   \n",
       "21  052019  Queere Menschen müssen wirksam geschützt werde...   \n",
       "22  062014  Weltflüchtlingstag 2014 – Gesellschaftliche Ak...   \n",
       "23  062015  Für eine humane Flüchtlingspolitik der EU Fran...   \n",
       "24  062016  Flüchtlinge brauchen weiterhin unsere Hilfe Fr...   \n",
       "25  062017  Folter ist ein Angriff auf die Menschenwürde F...   \n",
       "26  062018  Ungarn plant Angriff auf die Zivilgesellschaft...   \n",
       "27  062019  Mehr als 70 Millionen Menschen auf der Flucht ...   \n",
       "28  072014  Schwerste Verbrechen an irakischen Christen Ke...   \n",
       "29  072015  Unbegleiteten minderjährigen Flüchtlingen gute...   \n",
       "30  072016  Integrationsgesetz: Mit weiteren Verbesserunge...   \n",
       "31  072017  Fluchtursachen bekämpfen – Zukunftsperspektive...   \n",
       "32  072018  NSU: Aufarbeitung noch nicht am Ende Eva Högl,...   \n",
       "33  072019  Menschenhandel entschieden bekämpfen Frank Sch...   \n",
       "34  082014  Irakische Jesiden brauchen Hilfe Frank Schwabe...   \n",
       "35  082015  Entschlossen gegen Rechtsextremismus Sönke Rix...   \n",
       "36  082016  BKA-Zahlen zu Straftaten gegen Asylunterkünfte...   \n",
       "37  082018  Spurwechsel: Stichtagsregelung wäre die beste ...   \n",
       "38  092014  Mehr irakische Flüchtlinge aufnehmen Eva Högl,...   \n",
       "39  092015  Bessere Betreuung von Flüchtlingskindern Sönke...   \n",
       "40  092016  Vorsätzliche Angriffe auf Hilfskonvois sind ei...   \n",
       "41  092017  Flüchtlingskindern eine Chance durch Bildung g...   \n",
       "42  092018  Kinder auf der Flucht benötigen unseren Schutz...   \n",
       "43  102014  Hunger und Armut bis 2030 überwinden  Axel Sch...   \n",
       "44  102015  Jetzt müssen wir es machen Axel Schäfer, stell...   \n",
       "45  102016  Friedensdialog in Burundi fördern Gabriela Hei...   \n",
       "46  102018  Millionen Menschen von humanitärer Hilfe abges...   \n",
       "47  112014  Gleichwertigkeit aller Menschen verteidigen Sö...   \n",
       "48  112015  Aus den Augen der Kinder Sönke Rix, Sprecher d...   \n",
       "49  112016  Bundeshaushalt 2017: Arbeitsmarkt- und Sozialp...   \n",
       "50  112017  Für eine moderne Migrationspolitik – SPD Bunde...   \n",
       "51  122013  Rechte der Flüchtlinge wahren Kerstin Griese, ...   \n",
       "52  122014  Pegida: Gefährlicher Unsinn Burkhard Lischka, ...   \n",
       "53  122015  Für eine menschliche Flüchtlingspolitik Frank ...   \n",
       "54  122016  Erster Bericht des Deutschen Instituts für Men...   \n",
       "55  122017  Zum \"Internationalen Tag der Menschenrechte\" d...   \n",
       "56  122018  Danke an alle Engagierten Sönke Rix, familienp...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [deutsches, institut, menschenrechte, gesetzli...   \n",
       "1   [einigung, ausweisungsrecht, vorschlag, augenm...   \n",
       "2   [vorwürfe, müssen, aufgeklärt, johannes, kahrs...   \n",
       "3   [kürzung, palästina, verschärft, situation, na...   \n",
       "4   [europa, handeln, odyssee, mittelmeer, darf, w...   \n",
       "5   [eu, schutz, menschenleben, schutz, grenzen, s...   \n",
       "6   [finanzierung, humanitären, hilfe, langfristig...   \n",
       "7   [humanitäre, hilfe, jemen, verstärken, frank, ...   \n",
       "8   [weibliche, genitalverstümmelung, bekämpfen, g...   \n",
       "9   [fachkräfte, herzlich, willkommen, ute, vogt, ...   \n",
       "10  [angst, flagge, zeigen, rassismus, menschenfei...   \n",
       "11  [wochen, rassismus, tage, gelegenheit, zivilco...   \n",
       "12  [schutzkonzepte, asylunterkünften, müssen, sta...   \n",
       "13  [sozialdemokraten, bleiben, hoffnungsträger, z...   \n",
       "14  [flüchtlingskatatstrophe, mittelmeer, schande,...   \n",
       "15  [zusätzliche, belastung, kommunen, sozialhilfe...   \n",
       "16  [gewaltexzesse, rohingya, stoppen, frank, schw...   \n",
       "17  [existenzminimum, verhandelbar, josip, juratov...   \n",
       "18  [meilenstein, flüchtlingspolitik, integrations...   \n",
       "19  [aktion, fluchtgedenken, augen, sinn, norbert,...   \n",
       "20  [verfolgung, transsexuellen, weltweit, beenden...   \n",
       "21  [queere, menschen, müssen, wirksam, geschützt,...   \n",
       "22  [weltflüchtlingstag, gesellschaftliche, akzept...   \n",
       "23  [humane, flüchtlingspolitik, eu, frank, schwab...   \n",
       "24  [flüchtlinge, brauchen, weiterhin, hilfe, fran...   \n",
       "25  [folter, angriff, menschenwürde, frank, schwab...   \n",
       "26  [ungarn, plant, angriff, zivilgesellschaft, fr...   \n",
       "27  [mehr, millionen, menschen, flucht, frank, sch...   \n",
       "28  [schwerste, verbrechen, irakischen, christen, ...   \n",
       "29  [unbegleiteten, minderjährigen, flüchtlingen, ...   \n",
       "30  [integrationsgesetz, weiteren, verbesserungen,...   \n",
       "31  [fluchtursachen, bekämpfen, zukunftsperspektiv...   \n",
       "32  [nsu, aufarbeitung, ende, eva, högl, stellvert...   \n",
       "33  [menschenhandel, entschieden, bekämpfen, frank...   \n",
       "34  [irakische, jesiden, brauchen, hilfe, frank, s...   \n",
       "35  [entschlossen, rechtsextremismus, sönke, rix, ...   \n",
       "36  [straftaten, asylunterkünfte, erschreckend, ev...   \n",
       "37  [spurwechsel, stichtagsregelung, wäre, beste, ...   \n",
       "38  [mehr, irakische, flüchtlinge, aufnehmen, eva,...   \n",
       "39  [bessere, betreuung, flüchtlingskindern, sönke...   \n",
       "40  [vorsätzliche, angriffe, hilfskonvois, kriegsv...   \n",
       "41  [flüchtlingskindern, chance, bildung, geben, f...   \n",
       "42  [kinder, flucht, benötigen, schutz, frank, sch...   \n",
       "43  [hunger, armut, überwinden, axel, schäfer, ste...   \n",
       "44  [müssen, axel, schäfer, stellvertretender, fra...   \n",
       "45  [friedensdialog, burundi, fördern, gabriela, h...   \n",
       "46  [millionen, menschen, humanitärer, hilfe, abge...   \n",
       "47  [gleichwertigkeit, menschen, verteidigen, sönk...   \n",
       "48  [augen, kinder, sönke, rix, sprecher, arbeitsg...   \n",
       "49  [bundeshaushalt, sozialpolitik, menschen, ewal...   \n",
       "50  [moderne, migrationspolitik, spd, bundestagsfr...   \n",
       "51  [rechte, flüchtlinge, wahren, kerstin, griese,...   \n",
       "52  [pegida, gefährlicher, unsinn, burkhard, lisch...   \n",
       "53  [menschliche, flüchtlingspolitik, frank, schwa...   \n",
       "54  [erster, bericht, deutschen, instituts, mensch...   \n",
       "55  [internationalen, tag, menschenrechte, recht, ...   \n",
       "56  [danke, engagierten, sönke, rix, familienpolit...   \n",
       "\n",
       "                                           liststring  \n",
       "0   deutsches,institut,menschenrechte,gesetzliche,...  \n",
       "1   einigung,ausweisungsrecht,vorschlag,augenmaß,e...  \n",
       "2   vorwürfe,müssen,aufgeklärt,johannes,kahrs,beau...  \n",
       "3   kürzung,palästina,verschärft,situation,nahen,o...  \n",
       "4   europa,handeln,odyssee,mittelmeer,darf,wiederh...  \n",
       "5   eu,schutz,menschenleben,schutz,grenzen,stellen...  \n",
       "6   finanzierung,humanitären,hilfe,langfristig,sic...  \n",
       "7   humanitäre,hilfe,jemen,verstärken,frank,schwab...  \n",
       "8   weibliche,genitalverstümmelung,bekämpfen,gabri...  \n",
       "9   fachkräfte,herzlich,willkommen,ute,vogt,innenp...  \n",
       "10  angst,flagge,zeigen,rassismus,menschenfeindlic...  \n",
       "11  wochen,rassismus,tage,gelegenheit,zivilcourage...  \n",
       "12  schutzkonzepte,asylunterkünften,müssen,standar...  \n",
       "13  sozialdemokraten,bleiben,hoffnungsträger,zukun...  \n",
       "14  flüchtlingskatatstrophe,mittelmeer,schande,eur...  \n",
       "15  zusätzliche,belastung,kommunen,sozialhilfeleis...  \n",
       "16  gewaltexzesse,rohingya,stoppen,frank,schwabe,s...  \n",
       "17  existenzminimum,verhandelbar,josip,juratovic,i...  \n",
       "18  meilenstein,flüchtlingspolitik,integrationsges...  \n",
       "19  aktion,fluchtgedenken,augen,sinn,norbert,spinr...  \n",
       "20  verfolgung,transsexuellen,weltweit,beenden,gab...  \n",
       "21  queere,menschen,müssen,wirksam,geschützt,brunn...  \n",
       "22  weltflüchtlingstag,gesellschaftliche,akzeptanz...  \n",
       "23  humane,flüchtlingspolitik,eu,frank,schwabe,men...  \n",
       "24  flüchtlinge,brauchen,weiterhin,hilfe,frank,sch...  \n",
       "25  folter,angriff,menschenwürde,frank,schwabe,men...  \n",
       "26  ungarn,plant,angriff,zivilgesellschaft,frank,s...  \n",
       "27  mehr,millionen,menschen,flucht,frank,schwabe,m...  \n",
       "28  schwerste,verbrechen,irakischen,christen,kerst...  \n",
       "29  unbegleiteten,minderjährigen,flüchtlingen,gute...  \n",
       "30  integrationsgesetz,weiteren,verbesserungen,zie...  \n",
       "31  fluchtursachen,bekämpfen,zukunftsperspektiven,...  \n",
       "32  nsu,aufarbeitung,ende,eva,högl,stellvertretend...  \n",
       "33  menschenhandel,entschieden,bekämpfen,frank,sch...  \n",
       "34  irakische,jesiden,brauchen,hilfe,frank,schwabe...  \n",
       "35  entschlossen,rechtsextremismus,sönke,rix,sprec...  \n",
       "36  straftaten,asylunterkünfte,erschreckend,eva,hö...  \n",
       "37  spurwechsel,stichtagsregelung,wäre,beste,lösun...  \n",
       "38  mehr,irakische,flüchtlinge,aufnehmen,eva,högl,...  \n",
       "39  bessere,betreuung,flüchtlingskindern,sönke,rix...  \n",
       "40  vorsätzliche,angriffe,hilfskonvois,kriegsverbr...  \n",
       "41  flüchtlingskindern,chance,bildung,geben,frank,...  \n",
       "42  kinder,flucht,benötigen,schutz,frank,schwabe,s...  \n",
       "43  hunger,armut,überwinden,axel,schäfer,stellvert...  \n",
       "44  müssen,axel,schäfer,stellvertretender,fraktion...  \n",
       "45  friedensdialog,burundi,fördern,gabriela,heinri...  \n",
       "46  millionen,menschen,humanitärer,hilfe,abgeschni...  \n",
       "47  gleichwertigkeit,menschen,verteidigen,sönke,ri...  \n",
       "48  augen,kinder,sönke,rix,sprecher,arbeitsgruppe,...  \n",
       "49  bundeshaushalt,sozialpolitik,menschen,ewald,sc...  \n",
       "50  moderne,migrationspolitik,spd,bundestagsfrakti...  \n",
       "51  rechte,flüchtlinge,wahren,kerstin,griese,beric...  \n",
       "52  pegida,gefährlicher,unsinn,burkhard,lischka,in...  \n",
       "53  menschliche,flüchtlingspolitik,frank,schwabe,m...  \n",
       "54  erster,bericht,deutschen,instituts,menschenrec...  \n",
       "55  internationalen,tag,menschenrechte,recht,famil...  \n",
       "56  danke,engagierten,sönke,rix,familienpolitische...  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#convert nlpprocessed column to string\n",
    "spd_rel_final['liststring'] = [','.join(map(str, l)) for l in spd_rel_final['nlpprocessed']]\n",
    "spd_rel_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Deutsches Institut für Menschenrechte auf gese...</td>\n",
       "      <td>[deutsches, institut, menschenrechte, gesetzli...</td>\n",
       "      <td>deutsches,institut,menschenrechte,gesetzliche,...</td>\n",
       "      <td>123</td>\n",
       "      <td>71</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Einigung zum Ausweisungsrecht ist Vorschlag mi...</td>\n",
       "      <td>[einigung, ausweisungsrecht, vorschlag, augenm...</td>\n",
       "      <td>einigung,ausweisungsrecht,vorschlag,augenmaß,e...</td>\n",
       "      <td>94</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Vorwürfe müssen aufgeklärt werden Johannes Kah...</td>\n",
       "      <td>[vorwürfe, müssen, aufgeklärt, johannes, kahrs...</td>\n",
       "      <td>vorwürfe,müssen,aufgeklärt,johannes,kahrs,beau...</td>\n",
       "      <td>59</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Kürzung der US-Hilfsmittel für Palästina versc...</td>\n",
       "      <td>[kürzung, palästina, verschärft, situation, na...</td>\n",
       "      <td>kürzung,palästina,verschärft,situation,nahen,o...</td>\n",
       "      <td>24</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Europa muss handeln: Odyssee auf dem Mittelmee...</td>\n",
       "      <td>[europa, handeln, odyssee, mittelmeer, darf, w...</td>\n",
       "      <td>europa,handeln,odyssee,mittelmeer,darf,wiederh...</td>\n",
       "      <td>25</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content  \\\n",
       "0  012015  Deutsches Institut für Menschenrechte auf gese...   \n",
       "1  012016  Einigung zum Ausweisungsrecht ist Vorschlag mi...   \n",
       "2  012017  Vorwürfe müssen aufgeklärt werden Johannes Kah...   \n",
       "3  012018  Kürzung der US-Hilfsmittel für Palästina versc...   \n",
       "4  012019  Europa muss handeln: Odyssee auf dem Mittelmee...   \n",
       "\n",
       "                                        nlpprocessed  \\\n",
       "0  [deutsches, institut, menschenrechte, gesetzli...   \n",
       "1  [einigung, ausweisungsrecht, vorschlag, augenm...   \n",
       "2  [vorwürfe, müssen, aufgeklärt, johannes, kahrs...   \n",
       "3  [kürzung, palästina, verschärft, situation, na...   \n",
       "4  [europa, handeln, odyssee, mittelmeer, darf, w...   \n",
       "\n",
       "                                          liststring  positive words  \\\n",
       "0  deutsches,institut,menschenrechte,gesetzliche,...             123   \n",
       "1  einigung,ausweisungsrecht,vorschlag,augenmaß,e...              94   \n",
       "2  vorwürfe,müssen,aufgeklärt,johannes,kahrs,beau...              59   \n",
       "3  kürzung,palästina,verschärft,situation,nahen,o...              24   \n",
       "4  europa,handeln,odyssee,mittelmeer,darf,wiederh...              25   \n",
       "\n",
       "   negative words  \n",
       "0              71  \n",
       "1              33  \n",
       "2              29  \n",
       "3              10  \n",
       "4              13  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spd_rel_final['positive words'] = spd_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "spd_rel_final['negative words'] = spd_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "spd_rel_final.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "conditions = [\n",
    "(spd_rel_final['positive words'] > spd_rel_final['negative words']),\n",
    "(spd_rel_final['negative words'] > spd_rel_final['positive words']),\n",
    "(spd_rel_final['negative words'] == spd_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "spd_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "spd_rel_final\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "#clean data\n",
    "del spd_rel_final['nlpprocessed']\n",
    "spd_rel_final = spd_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"date\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "spd_rel_final.head()\n",
    "spd_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/spd_sentiment.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CDU - load refugeerelevant data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Gewalt gegen asylsuchende Christen in Deutschl...</td>\n",
       "      <td>12.08.2014</td>\n",
       "      <td>Grundrecht auf Religionsfreiheit konsequent ...</td>\n",
       "      <td>[0.59266729]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Verbot von Kinderehen zügig umsetzen</td>\n",
       "      <td>04.04.2017</td>\n",
       "      <td>Union begrüßt Kabinettsbeschluss und Start d...</td>\n",
       "      <td>[0.56154596]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gewalt ist niemals akzeptabel</td>\n",
       "      <td>21.08.2015</td>\n",
       "      <td>Schwere Krawalle in Flüchtlingsheim in Suhl ...</td>\n",
       "      <td>[0.55196318]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Keine rechtsfreien Räume in unseren Städten</td>\n",
       "      <td>19.02.2016</td>\n",
       "      <td>Der Staat ist handlungsfähig  Die Ereignisse...</td>\n",
       "      <td>[0.54658599]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Vorschlag Junckers zur Flüchtlingspolitik geht...</td>\n",
       "      <td>09.09.2015</td>\n",
       "      <td>Europa muss Flüchtlingen helfen und zugleich...</td>\n",
       "      <td>[0.5342253]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>193</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title        date  \\\n",
       "0  Gewalt gegen asylsuchende Christen in Deutschl...  12.08.2014   \n",
       "1               Verbot von Kinderehen zügig umsetzen  04.04.2017   \n",
       "2                      Gewalt ist niemals akzeptabel  21.08.2015   \n",
       "3        Keine rechtsfreien Räume in unseren Städten  19.02.2016   \n",
       "4  Vorschlag Junckers zur Flüchtlingspolitik geht...  09.09.2015   \n",
       "\n",
       "                                             content         score  \\\n",
       "0    Grundrecht auf Religionsfreiheit konsequent ...  [0.59266729]   \n",
       "1    Union begrüßt Kabinettsbeschluss und Start d...  [0.56154596]   \n",
       "2    Schwere Krawalle in Flüchtlingsheim in Suhl ...  [0.55196318]   \n",
       "3    Der Staat ist handlungsfähig  Die Ereignisse...  [0.54658599]   \n",
       "4    Europa muss Flüchtlingen helfen und zugleich...   [0.5342253]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      229  \n",
       "1  OVER-THRESHOLD      303  \n",
       "2  OVER-THRESHOLD      206  \n",
       "3  OVER-THRESHOLD      402  \n",
       "4  OVER-THRESHOLD      193  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel = pd.read_csv(\"/Users/ashrakatelshehawy/cdu_refugeerelevant.csv\",header=None, encoding='utf-8',delimiter='\\t',error_bad_lines=False)\n",
    "cdu_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n",
    "\n",
    "cdu_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>Humanitäre Hilfe für Syrien und Nachbarländer ...</td>\n",
       "      <td>14.03.2019</td>\n",
       "      <td>Deutschland kommt seiner Verantwortung nach ...</td>\n",
       "      <td>[0.46223735]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>190</th>\n",
       "      <td>Schlepper müssen bekämpft werden</td>\n",
       "      <td>05.09.2015</td>\n",
       "      <td>Bundestagsmandat zum bewaffneten Einsatz ben...</td>\n",
       "      <td>[0.40283407]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>309</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>315</th>\n",
       "      <td>Volles Programm bis zur Sommerpause</td>\n",
       "      <td>03.06.2016</td>\n",
       "      <td>Volker Kauder im Kommentar der Woche  \"Bis z...</td>\n",
       "      <td>[0.38526323]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>316</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>309</th>\n",
       "      <td>Flüchtlingskinder brauchen kindgerechte Flücht...</td>\n",
       "      <td>09.04.2015</td>\n",
       "      <td>Spielgeräte und Spielausstattung aus dem 100...</td>\n",
       "      <td>[0.38575226]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231</th>\n",
       "      <td>Flüchtlingszahlen reduzieren und begrenzen</td>\n",
       "      <td>26.11.2015</td>\n",
       "      <td>Eine Grundgesetzänderung wird es nicht geben E...</td>\n",
       "      <td>[0.39702787]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Nicht nur Deutschland trägt Verantwortung in d...</td>\n",
       "      <td>10.09.2014</td>\n",
       "      <td>Ungleiche Verteilung der Asylbewerber erford...</td>\n",
       "      <td>[0.52878829]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>317</th>\n",
       "      <td>Mehr Effizienz bei Asylverfahren</td>\n",
       "      <td>14.01.2016</td>\n",
       "      <td>Ankunftsnachweis zur Identifizierung Der Bunde...</td>\n",
       "      <td>[0.38522478]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>413</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>\"Wir haben klare Regeln für das Miteinander\"</td>\n",
       "      <td>21.10.2015</td>\n",
       "      <td>Ein generelles Kippen der Stimmung kann ich ni...</td>\n",
       "      <td>[0.45017325]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228</th>\n",
       "      <td>Asylpaket II kommt jetzt in den Bundestag</td>\n",
       "      <td>11.02.2016</td>\n",
       "      <td>Bisheriger Gesetzentwurf bleibt unverändert De...</td>\n",
       "      <td>[0.39735552]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>Wir wollen die europäische Außengrenze sichern</td>\n",
       "      <td>01.02.2016</td>\n",
       "      <td>Polizei besser aufstellen Die Einigung auf das...</td>\n",
       "      <td>[0.39765634]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1648</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>„Die öffentliche Sicherheit muss gewährleistet...</td>\n",
       "      <td>17.09.2015</td>\n",
       "      <td>Grundsätze und Werte akzeptieren Der Unionsfra...</td>\n",
       "      <td>[0.44749145]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>Die Länder müssen ihre Hausaufgaben machen“\\t0...</td>\n",
       "      <td>19.11.2015</td>\n",
       "      <td>Familiennachzug hat keine Priorität Unionsfrak...</td>\n",
       "      <td>[0.40592977]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>Wir müssen Zuwanderer qualifizieren</td>\n",
       "      <td>01.12.2015</td>\n",
       "      <td>Arbeitslosenzahlen auf Rekordtief  Die Arbei...</td>\n",
       "      <td>[0.42636529]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>Fordern und Fördern</td>\n",
       "      <td>17.04.2019</td>\n",
       "      <td>Mehr Unterstützung von Migranten bei Arbeits...</td>\n",
       "      <td>[0.39800225]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>292</th>\n",
       "      <td>Das sind wirkliche Kriegsflüchtlinge</td>\n",
       "      <td>10.02.2016</td>\n",
       "      <td>Das sind wirkliche Kriegsflüchtlinge Hundertta...</td>\n",
       "      <td>[0.3872143]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Neue Regierung in Nigeria muss Kinder wirksam ...</td>\n",
       "      <td>13.04.2015</td>\n",
       "      <td>Laut UNICEF-Bericht 800.000 Kinder auf der F...</td>\n",
       "      <td>[0.46289603]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>316</th>\n",
       "      <td>SPD-regierte Länder betreiben falsche Asylpoli...</td>\n",
       "      <td>13.03.2017</td>\n",
       "      <td>Blockadepolitik der SPD-regierten Länder sende...</td>\n",
       "      <td>[0.38523659]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1704</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>Weitere Schritte zur Bewältigung der Flüchtlin...</td>\n",
       "      <td>06.11.2015</td>\n",
       "      <td>Union ist sich einig   Die Flüchtlingsbewegu...</td>\n",
       "      <td>[0.39899732]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>Der Einsatz in Syrien ist notwendig</td>\n",
       "      <td>04.12.2015</td>\n",
       "      <td>Volker Kauder im Kommentar der Woche  In die...</td>\n",
       "      <td>[0.45226997]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>Blick auf diejenigen richten, die sich schwer tun</td>\n",
       "      <td>31.01.2017</td>\n",
       "      <td>Arbeitslosenzahlen steigen an  Im Januar ist...</td>\n",
       "      <td>[0.38964419]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>Die Loyalität türkischstämmiger Bürger muss in...</td>\n",
       "      <td>07.08.2016</td>\n",
       "      <td>Islamverbände müssen Integration fördern Seit ...</td>\n",
       "      <td>[0.41802076]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>103</th>\n",
       "      <td>Wer sich anstrengt, soll belohnt werden</td>\n",
       "      <td>07.07.2016</td>\n",
       "      <td>Bundestag verabschiedet Integrationsgesetz  ...</td>\n",
       "      <td>[0.4345264]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>Altersfeststellung unbegleiteter minderjährige...</td>\n",
       "      <td>19.01.2018</td>\n",
       "      <td>Heute hat der Deutsche Bundestag in einer leb...</td>\n",
       "      <td>[0.42326818]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>175</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Wir brauchen ein vereinheitlichtes europäische...</td>\n",
       "      <td>27.09.2015</td>\n",
       "      <td>Wir brauchen ein Recht, das gilt Im Gespräch m...</td>\n",
       "      <td>[0.46552779]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>Putins Syrien-Politik ist zynisch</td>\n",
       "      <td>23.11.2017</td>\n",
       "      <td>Nicht Sotschi, sondern Genf ist der richtige...</td>\n",
       "      <td>[0.40207028]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>283</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title        date  \\\n",
       "41   Humanitäre Hilfe für Syrien und Nachbarländer ...  14.03.2019   \n",
       "190                   Schlepper müssen bekämpft werden  05.09.2015   \n",
       "315                Volles Programm bis zur Sommerpause  03.06.2016   \n",
       "309  Flüchtlingskinder brauchen kindgerechte Flücht...  09.04.2015   \n",
       "231         Flüchtlingszahlen reduzieren und begrenzen  26.11.2015   \n",
       "7    Nicht nur Deutschland trägt Verantwortung in d...  10.09.2014   \n",
       "317                   Mehr Effizienz bei Asylverfahren  14.01.2016   \n",
       "68        \"Wir haben klare Regeln für das Miteinander\"  21.10.2015   \n",
       "228          Asylpaket II kommt jetzt in den Bundestag  11.02.2016   \n",
       "227     Wir wollen die europäische Außengrenze sichern  01.02.2016   \n",
       "73   „Die öffentliche Sicherheit muss gewährleistet...  17.09.2015   \n",
       "179  Die Länder müssen ihre Hausaufgaben machen“\\t0...  19.11.2015   \n",
       "120                Wir müssen Zuwanderer qualifizieren  01.12.2015   \n",
       "224                               Fordern und Fördern   17.04.2019   \n",
       "292               Das sind wirkliche Kriegsflüchtlinge  10.02.2016   \n",
       "39   Neue Regierung in Nigeria muss Kinder wirksam ...  13.04.2015   \n",
       "316  SPD-regierte Länder betreiben falsche Asylpoli...  13.03.2017   \n",
       "211  Weitere Schritte zur Bewältigung der Flüchtlin...  06.11.2015   \n",
       "63                 Der Einsatz in Syrien ist notwendig  04.12.2015   \n",
       "274  Blick auf diejenigen richten, die sich schwer tun  31.01.2017   \n",
       "146  Die Loyalität türkischstämmiger Bürger muss in...  07.08.2016   \n",
       "103            Wer sich anstrengt, soll belohnt werden  07.07.2016   \n",
       "128  Altersfeststellung unbegleiteter minderjährige...  19.01.2018   \n",
       "36   Wir brauchen ein vereinheitlichtes europäische...  27.09.2015   \n",
       "194                  Putins Syrien-Politik ist zynisch  23.11.2017   \n",
       "\n",
       "                                               content         score  \\\n",
       "41     Deutschland kommt seiner Verantwortung nach ...  [0.46223735]   \n",
       "190    Bundestagsmandat zum bewaffneten Einsatz ben...  [0.40283407]   \n",
       "315    Volker Kauder im Kommentar der Woche  \"Bis z...  [0.38526323]   \n",
       "309    Spielgeräte und Spielausstattung aus dem 100...  [0.38575226]   \n",
       "231  Eine Grundgesetzänderung wird es nicht geben E...  [0.39702787]   \n",
       "7      Ungleiche Verteilung der Asylbewerber erford...  [0.52878829]   \n",
       "317  Ankunftsnachweis zur Identifizierung Der Bunde...  [0.38522478]   \n",
       "68   Ein generelles Kippen der Stimmung kann ich ni...  [0.45017325]   \n",
       "228  Bisheriger Gesetzentwurf bleibt unverändert De...  [0.39735552]   \n",
       "227  Polizei besser aufstellen Die Einigung auf das...  [0.39765634]   \n",
       "73   Grundsätze und Werte akzeptieren Der Unionsfra...  [0.44749145]   \n",
       "179  Familiennachzug hat keine Priorität Unionsfrak...  [0.40592977]   \n",
       "120    Arbeitslosenzahlen auf Rekordtief  Die Arbei...  [0.42636529]   \n",
       "224    Mehr Unterstützung von Migranten bei Arbeits...  [0.39800225]   \n",
       "292  Das sind wirkliche Kriegsflüchtlinge Hundertta...   [0.3872143]   \n",
       "39     Laut UNICEF-Bericht 800.000 Kinder auf der F...  [0.46289603]   \n",
       "316  Blockadepolitik der SPD-regierten Länder sende...  [0.38523659]   \n",
       "211    Union ist sich einig   Die Flüchtlingsbewegu...  [0.39899732]   \n",
       "63     Volker Kauder im Kommentar der Woche  In die...  [0.45226997]   \n",
       "274    Arbeitslosenzahlen steigen an  Im Januar ist...  [0.38964419]   \n",
       "146  Islamverbände müssen Integration fördern Seit ...  [0.41802076]   \n",
       "103    Bundestag verabschiedet Integrationsgesetz  ...   [0.4345264]   \n",
       "128   Heute hat der Deutsche Bundestag in einer leb...  [0.42326818]   \n",
       "36   Wir brauchen ein Recht, das gilt Im Gespräch m...  [0.46552779]   \n",
       "194    Nicht Sotschi, sondern Genf ist der richtige...  [0.40207028]   \n",
       "\n",
       "       how_included  n_words  \n",
       "41   OVER-THRESHOLD      241  \n",
       "190  OVER-THRESHOLD      309  \n",
       "315  OVER-THRESHOLD      316  \n",
       "309  OVER-THRESHOLD      230  \n",
       "231  OVER-THRESHOLD     1317  \n",
       "7    OVER-THRESHOLD      288  \n",
       "317  OVER-THRESHOLD      413  \n",
       "68   OVER-THRESHOLD      900  \n",
       "228  OVER-THRESHOLD      624  \n",
       "227  OVER-THRESHOLD     1648  \n",
       "73   OVER-THRESHOLD      679  \n",
       "179  OVER-THRESHOLD     1156  \n",
       "120  OVER-THRESHOLD      252  \n",
       "224  OVER-THRESHOLD      190  \n",
       "292  OVER-THRESHOLD     1582  \n",
       "39   OVER-THRESHOLD      255  \n",
       "316  OVER-THRESHOLD     1704  \n",
       "211  OVER-THRESHOLD      342  \n",
       "63   OVER-THRESHOLD      370  \n",
       "274  OVER-THRESHOLD      265  \n",
       "146  OVER-THRESHOLD      696  \n",
       "103  OVER-THRESHOLD      229  \n",
       "128  OVER-THRESHOLD      175  \n",
       "36   OVER-THRESHOLD     1024  \n",
       "194  OVER-THRESHOLD      283  "
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel_sample=cdu_rel.sample(25)\n",
    "cdu_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Gewalt gegen asylsuchende Christen in Deutschl...</td>\n",
       "      <td>082014</td>\n",
       "      <td>Grundrecht auf Religionsfreiheit konsequent ...</td>\n",
       "      <td>[0.59266729]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Verbot von Kinderehen zügig umsetzen</td>\n",
       "      <td>042017</td>\n",
       "      <td>Union begrüßt Kabinettsbeschluss und Start d...</td>\n",
       "      <td>[0.56154596]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gewalt ist niemals akzeptabel</td>\n",
       "      <td>082015</td>\n",
       "      <td>Schwere Krawalle in Flüchtlingsheim in Suhl ...</td>\n",
       "      <td>[0.55196318]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Keine rechtsfreien Räume in unseren Städten</td>\n",
       "      <td>022016</td>\n",
       "      <td>Der Staat ist handlungsfähig  Die Ereignisse...</td>\n",
       "      <td>[0.54658599]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Vorschlag Junckers zur Flüchtlingspolitik geht...</td>\n",
       "      <td>092015</td>\n",
       "      <td>Europa muss Flüchtlingen helfen und zugleich...</td>\n",
       "      <td>[0.5342253]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>193</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title    date  \\\n",
       "0  Gewalt gegen asylsuchende Christen in Deutschl...  082014   \n",
       "1               Verbot von Kinderehen zügig umsetzen  042017   \n",
       "2                      Gewalt ist niemals akzeptabel  082015   \n",
       "3        Keine rechtsfreien Räume in unseren Städten  022016   \n",
       "4  Vorschlag Junckers zur Flüchtlingspolitik geht...  092015   \n",
       "\n",
       "                                             content         score  \\\n",
       "0    Grundrecht auf Religionsfreiheit konsequent ...  [0.59266729]   \n",
       "1    Union begrüßt Kabinettsbeschluss und Start d...  [0.56154596]   \n",
       "2    Schwere Krawalle in Flüchtlingsheim in Suhl ...  [0.55196318]   \n",
       "3    Der Staat ist handlungsfähig  Die Ereignisse...  [0.54658599]   \n",
       "4    Europa muss Flüchtlingen helfen und zugleich...   [0.5342253]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      229  \n",
       "1  OVER-THRESHOLD      303  \n",
       "2  OVER-THRESHOLD      206  \n",
       "3  OVER-THRESHOLD      402  \n",
       "4  OVER-THRESHOLD      193  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel['date'] = cdu_rel['date'].str[3:]\n",
    "cdu_rel['date'] = [x.replace('.', '') for x in cdu_rel['date']]\n",
    "cdu_rel.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Gewalt gegen asylsuchende Christen in Deutschl...</td>\n",
       "      <td>082014</td>\n",
       "      <td>Gewalt gegen asylsuchende Christen in Deutschl...</td>\n",
       "      <td>[0.59266729]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Verbot von Kinderehen zügig umsetzen</td>\n",
       "      <td>042017</td>\n",
       "      <td>Verbot von Kinderehen zügig umsetzen   Union b...</td>\n",
       "      <td>[0.56154596]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Gewalt ist niemals akzeptabel</td>\n",
       "      <td>082015</td>\n",
       "      <td>Gewalt ist niemals akzeptabel   Schwere Krawal...</td>\n",
       "      <td>[0.55196318]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Keine rechtsfreien Räume in unseren Städten</td>\n",
       "      <td>022016</td>\n",
       "      <td>Keine rechtsfreien Räume in unseren Städten   ...</td>\n",
       "      <td>[0.54658599]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Vorschlag Junckers zur Flüchtlingspolitik geht...</td>\n",
       "      <td>092015</td>\n",
       "      <td>Vorschlag Junckers zur Flüchtlingspolitik geht...</td>\n",
       "      <td>[0.5342253]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>193</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title    date  \\\n",
       "0  Gewalt gegen asylsuchende Christen in Deutschl...  082014   \n",
       "1               Verbot von Kinderehen zügig umsetzen  042017   \n",
       "2                      Gewalt ist niemals akzeptabel  082015   \n",
       "3        Keine rechtsfreien Räume in unseren Städten  022016   \n",
       "4  Vorschlag Junckers zur Flüchtlingspolitik geht...  092015   \n",
       "\n",
       "                                             content         score  \\\n",
       "0  Gewalt gegen asylsuchende Christen in Deutschl...  [0.59266729]   \n",
       "1  Verbot von Kinderehen zügig umsetzen   Union b...  [0.56154596]   \n",
       "2  Gewalt ist niemals akzeptabel   Schwere Krawal...  [0.55196318]   \n",
       "3  Keine rechtsfreien Räume in unseren Städten   ...  [0.54658599]   \n",
       "4  Vorschlag Junckers zur Flüchtlingspolitik geht...   [0.5342253]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      229  \n",
       "1  OVER-THRESHOLD      303  \n",
       "2  OVER-THRESHOLD      206  \n",
       "3  OVER-THRESHOLD      402  \n",
       "4  OVER-THRESHOLD      193  "
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel[\"content\"] = cdu_rel[\"title\"] +[\" \"]+ cdu_rel[\"content\"] \n",
    "\n",
    "cdu_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Weltweite Christenverfolgung weiter höchst bes...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Optimale Startchancen für unbegleitete minderj...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>EU ist keine Sozialunion   Bundessozialministe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Recht auf Religionsfreiheit darf Integration n...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Altersfeststellung unbegleiteter minderjährige...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content\n",
       "0  012014  Weltweite Christenverfolgung weiter höchst bes...\n",
       "1  012015  Optimale Startchancen für unbegleitete minderj...\n",
       "2  012016  EU ist keine Sozialunion   Bundessozialministe...\n",
       "3  012017  Recht auf Religionsfreiheit darf Integration n...\n",
       "4  012018  Altersfeststellung unbegleiteter minderjährige..."
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel=cdu_rel.groupby(['date'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "len(cdu_rel)\n",
    "cdu_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['012014',\n",
       " '012015',\n",
       " '012016',\n",
       " '012017',\n",
       " '012018',\n",
       " '012020',\n",
       " '022014',\n",
       " '022015',\n",
       " '022016',\n",
       " '022017',\n",
       " '022018',\n",
       " '022019',\n",
       " '022020',\n",
       " '032014',\n",
       " '032016',\n",
       " '032017',\n",
       " '032019',\n",
       " '042014',\n",
       " '042015',\n",
       " '042016',\n",
       " '042017',\n",
       " '042018',\n",
       " '042019',\n",
       " '052014',\n",
       " '052015',\n",
       " '052016',\n",
       " '052019',\n",
       " '062014',\n",
       " '062015',\n",
       " '062016',\n",
       " '062017',\n",
       " '062018',\n",
       " '062019',\n",
       " '072014',\n",
       " '072015',\n",
       " '072016',\n",
       " '072017',\n",
       " '072018',\n",
       " '072019',\n",
       " '082014',\n",
       " '082015',\n",
       " '082016',\n",
       " '082017',\n",
       " '082018',\n",
       " '082019',\n",
       " '092014',\n",
       " '092015',\n",
       " '092016',\n",
       " '092017',\n",
       " '092018',\n",
       " '102014',\n",
       " '102015',\n",
       " '102016',\n",
       " '102018',\n",
       " '112014',\n",
       " '112015',\n",
       " '112016',\n",
       " '112017',\n",
       " '112018',\n",
       " '112019',\n",
       " '122013',\n",
       " '122014',\n",
       " '122015',\n",
       " '122016',\n",
       " '122017',\n",
       " '122018']"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "cdu_rel[\"nlpprocessed\"]=cdu_rel['content'].apply(nlp_pipeline)\n",
    "cdu_rel_final=cdu_rel\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Weltweite Christenverfolgung weiter höchst bes...</td>\n",
       "      <td>[weltweite, christenverfolgung, höchst, besorg...</td>\n",
       "      <td>weltweite,christenverfolgung,höchst,besorgnise...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Optimale Startchancen für unbegleitete minderj...</td>\n",
       "      <td>[optimale, startchancen, unbegleitete, minderj...</td>\n",
       "      <td>optimale,startchancen,unbegleitete,minderjähri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>EU ist keine Sozialunion   Bundessozialministe...</td>\n",
       "      <td>[eu, sozialunion, bundessozialministerium, sch...</td>\n",
       "      <td>eu,sozialunion,bundessozialministerium,schnell...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Recht auf Religionsfreiheit darf Integration n...</td>\n",
       "      <td>[recht, religionsfreiheit, darf, integration, ...</td>\n",
       "      <td>recht,religionsfreiheit,darf,integration,ausbr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Altersfeststellung unbegleiteter minderjährige...</td>\n",
       "      <td>[altersfeststellung, unbegleiteter, minderjähr...</td>\n",
       "      <td>altersfeststellung,unbegleiteter,minderjährige...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>122014</td>\n",
       "      <td>Zuwanderer sind ein Gewinn für Deutschland   F...</td>\n",
       "      <td>[zuwanderer, gewinn, deutschland, flüchtlinge,...</td>\n",
       "      <td>zuwanderer,gewinn,deutschland,flüchtlinge,verd...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>122015</td>\n",
       "      <td>Der Einsatz in Syrien ist notwendig Reduzierun...</td>\n",
       "      <td>[einsatz, syrien, notwendig, reduzierung, flüc...</td>\n",
       "      <td>einsatz,syrien,notwendig,reduzierung,flüchtlin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>122016</td>\n",
       "      <td>Abschiebepraxis verschärfen Woran scheitern di...</td>\n",
       "      <td>[abschiebepraxis, verschärfen, woran, scheiter...</td>\n",
       "      <td>abschiebepraxis,verschärfen,woran,scheitern,ve...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>122017</td>\n",
       "      <td>Entschlossen gegen Menschenhandel und Zwangsar...</td>\n",
       "      <td>[entschlossen, menschenhandel, zwangsarbeit, v...</td>\n",
       "      <td>entschlossen,menschenhandel,zwangsarbeit,vorge...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>122018</td>\n",
       "      <td>Flüchtlingspakt ist im Interesse aller   Hilfe...</td>\n",
       "      <td>[flüchtlingspakt, interesse, hilfe, flüchtling...</td>\n",
       "      <td>flüchtlingspakt,interesse,hilfe,flüchtlinge,we...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>66 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      date                                            content  \\\n",
       "0   012014  Weltweite Christenverfolgung weiter höchst bes...   \n",
       "1   012015  Optimale Startchancen für unbegleitete minderj...   \n",
       "2   012016  EU ist keine Sozialunion   Bundessozialministe...   \n",
       "3   012017  Recht auf Religionsfreiheit darf Integration n...   \n",
       "4   012018  Altersfeststellung unbegleiteter minderjährige...   \n",
       "..     ...                                                ...   \n",
       "61  122014  Zuwanderer sind ein Gewinn für Deutschland   F...   \n",
       "62  122015  Der Einsatz in Syrien ist notwendig Reduzierun...   \n",
       "63  122016  Abschiebepraxis verschärfen Woran scheitern di...   \n",
       "64  122017  Entschlossen gegen Menschenhandel und Zwangsar...   \n",
       "65  122018  Flüchtlingspakt ist im Interesse aller   Hilfe...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [weltweite, christenverfolgung, höchst, besorg...   \n",
       "1   [optimale, startchancen, unbegleitete, minderj...   \n",
       "2   [eu, sozialunion, bundessozialministerium, sch...   \n",
       "3   [recht, religionsfreiheit, darf, integration, ...   \n",
       "4   [altersfeststellung, unbegleiteter, minderjähr...   \n",
       "..                                                ...   \n",
       "61  [zuwanderer, gewinn, deutschland, flüchtlinge,...   \n",
       "62  [einsatz, syrien, notwendig, reduzierung, flüc...   \n",
       "63  [abschiebepraxis, verschärfen, woran, scheiter...   \n",
       "64  [entschlossen, menschenhandel, zwangsarbeit, v...   \n",
       "65  [flüchtlingspakt, interesse, hilfe, flüchtling...   \n",
       "\n",
       "                                           liststring  \n",
       "0   weltweite,christenverfolgung,höchst,besorgnise...  \n",
       "1   optimale,startchancen,unbegleitete,minderjähri...  \n",
       "2   eu,sozialunion,bundessozialministerium,schnell...  \n",
       "3   recht,religionsfreiheit,darf,integration,ausbr...  \n",
       "4   altersfeststellung,unbegleiteter,minderjährige...  \n",
       "..                                                ...  \n",
       "61  zuwanderer,gewinn,deutschland,flüchtlinge,verd...  \n",
       "62  einsatz,syrien,notwendig,reduzierung,flüchtlin...  \n",
       "63  abschiebepraxis,verschärfen,woran,scheitern,ve...  \n",
       "64  entschlossen,menschenhandel,zwangsarbeit,vorge...  \n",
       "65  flüchtlingspakt,interesse,hilfe,flüchtlinge,we...  \n",
       "\n",
       "[66 rows x 4 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#convert nlpprocessed column to string\n",
    "cdu_rel_final['liststring'] = [','.join(map(str, l)) for l in cdu_rel_final['nlpprocessed']]\n",
    "cdu_rel_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Weltweite Christenverfolgung weiter höchst bes...</td>\n",
       "      <td>[weltweite, christenverfolgung, höchst, besorg...</td>\n",
       "      <td>weltweite,christenverfolgung,höchst,besorgnise...</td>\n",
       "      <td>47</td>\n",
       "      <td>38</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Optimale Startchancen für unbegleitete minderj...</td>\n",
       "      <td>[optimale, startchancen, unbegleitete, minderj...</td>\n",
       "      <td>optimale,startchancen,unbegleitete,minderjähri...</td>\n",
       "      <td>89</td>\n",
       "      <td>55</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>EU ist keine Sozialunion   Bundessozialministe...</td>\n",
       "      <td>[eu, sozialunion, bundessozialministerium, sch...</td>\n",
       "      <td>eu,sozialunion,bundessozialministerium,schnell...</td>\n",
       "      <td>2041</td>\n",
       "      <td>1085</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Recht auf Religionsfreiheit darf Integration n...</td>\n",
       "      <td>[recht, religionsfreiheit, darf, integration, ...</td>\n",
       "      <td>recht,religionsfreiheit,darf,integration,ausbr...</td>\n",
       "      <td>133</td>\n",
       "      <td>63</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Altersfeststellung unbegleiteter minderjährige...</td>\n",
       "      <td>[altersfeststellung, unbegleiteter, minderjähr...</td>\n",
       "      <td>altersfeststellung,unbegleiteter,minderjährige...</td>\n",
       "      <td>87</td>\n",
       "      <td>40</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>122014</td>\n",
       "      <td>Zuwanderer sind ein Gewinn für Deutschland   F...</td>\n",
       "      <td>[zuwanderer, gewinn, deutschland, flüchtlinge,...</td>\n",
       "      <td>zuwanderer,gewinn,deutschland,flüchtlinge,verd...</td>\n",
       "      <td>141</td>\n",
       "      <td>85</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>122015</td>\n",
       "      <td>Der Einsatz in Syrien ist notwendig Reduzierun...</td>\n",
       "      <td>[einsatz, syrien, notwendig, reduzierung, flüc...</td>\n",
       "      <td>einsatz,syrien,notwendig,reduzierung,flüchtlin...</td>\n",
       "      <td>172</td>\n",
       "      <td>142</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>122016</td>\n",
       "      <td>Abschiebepraxis verschärfen Woran scheitern di...</td>\n",
       "      <td>[abschiebepraxis, verschärfen, woran, scheiter...</td>\n",
       "      <td>abschiebepraxis,verschärfen,woran,scheitern,ve...</td>\n",
       "      <td>316</td>\n",
       "      <td>142</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>122017</td>\n",
       "      <td>Entschlossen gegen Menschenhandel und Zwangsar...</td>\n",
       "      <td>[entschlossen, menschenhandel, zwangsarbeit, v...</td>\n",
       "      <td>entschlossen,menschenhandel,zwangsarbeit,vorge...</td>\n",
       "      <td>69</td>\n",
       "      <td>42</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>122018</td>\n",
       "      <td>Flüchtlingspakt ist im Interesse aller   Hilfe...</td>\n",
       "      <td>[flüchtlingspakt, interesse, hilfe, flüchtling...</td>\n",
       "      <td>flüchtlingspakt,interesse,hilfe,flüchtlinge,we...</td>\n",
       "      <td>154</td>\n",
       "      <td>64</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>66 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      date                                            content  \\\n",
       "0   012014  Weltweite Christenverfolgung weiter höchst bes...   \n",
       "1   012015  Optimale Startchancen für unbegleitete minderj...   \n",
       "2   012016  EU ist keine Sozialunion   Bundessozialministe...   \n",
       "3   012017  Recht auf Religionsfreiheit darf Integration n...   \n",
       "4   012018  Altersfeststellung unbegleiteter minderjährige...   \n",
       "..     ...                                                ...   \n",
       "61  122014  Zuwanderer sind ein Gewinn für Deutschland   F...   \n",
       "62  122015  Der Einsatz in Syrien ist notwendig Reduzierun...   \n",
       "63  122016  Abschiebepraxis verschärfen Woran scheitern di...   \n",
       "64  122017  Entschlossen gegen Menschenhandel und Zwangsar...   \n",
       "65  122018  Flüchtlingspakt ist im Interesse aller   Hilfe...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [weltweite, christenverfolgung, höchst, besorg...   \n",
       "1   [optimale, startchancen, unbegleitete, minderj...   \n",
       "2   [eu, sozialunion, bundessozialministerium, sch...   \n",
       "3   [recht, religionsfreiheit, darf, integration, ...   \n",
       "4   [altersfeststellung, unbegleiteter, minderjähr...   \n",
       "..                                                ...   \n",
       "61  [zuwanderer, gewinn, deutschland, flüchtlinge,...   \n",
       "62  [einsatz, syrien, notwendig, reduzierung, flüc...   \n",
       "63  [abschiebepraxis, verschärfen, woran, scheiter...   \n",
       "64  [entschlossen, menschenhandel, zwangsarbeit, v...   \n",
       "65  [flüchtlingspakt, interesse, hilfe, flüchtling...   \n",
       "\n",
       "                                           liststring  positive words  \\\n",
       "0   weltweite,christenverfolgung,höchst,besorgnise...              47   \n",
       "1   optimale,startchancen,unbegleitete,minderjähri...              89   \n",
       "2   eu,sozialunion,bundessozialministerium,schnell...            2041   \n",
       "3   recht,religionsfreiheit,darf,integration,ausbr...             133   \n",
       "4   altersfeststellung,unbegleiteter,minderjährige...              87   \n",
       "..                                                ...             ...   \n",
       "61  zuwanderer,gewinn,deutschland,flüchtlinge,verd...             141   \n",
       "62  einsatz,syrien,notwendig,reduzierung,flüchtlin...             172   \n",
       "63  abschiebepraxis,verschärfen,woran,scheitern,ve...             316   \n",
       "64  entschlossen,menschenhandel,zwangsarbeit,vorge...              69   \n",
       "65  flüchtlingspakt,interesse,hilfe,flüchtlinge,we...             154   \n",
       "\n",
       "    negative words   overall  \n",
       "0               38  positive  \n",
       "1               55  positive  \n",
       "2             1085  positive  \n",
       "3               63  positive  \n",
       "4               40  positive  \n",
       "..             ...       ...  \n",
       "61              85  positive  \n",
       "62             142  positive  \n",
       "63             142  positive  \n",
       "64              42  positive  \n",
       "65              64  positive  \n",
       "\n",
       "[66 rows x 7 columns]"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cdu_rel_final['positive words'] = cdu_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "cdu_rel_final['negative words'] = cdu_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "\n",
    "\n",
    "conditions = [\n",
    "(cdu_rel_final['positive words'] > cdu_rel_final['negative words']),\n",
    "(cdu_rel_final['negative words'] > cdu_rel_final['positive words']),\n",
    "(cdu_rel_final['negative words'] == cdu_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "cdu_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "cdu_rel_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#clean data\n",
    "del cdu_rel_final['nlpprocessed']\n",
    "cdu_rel_final = cdu_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"date\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "cdu_rel_final.head()\n",
    "cdu_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/cdu_sentiment.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fdp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TEUTEBERG: Integrationsbeauftragte sollen sich...</td>\n",
       "      <td>092018</td>\n",
       "      <td>Zum Treffen der Integrationsbeauftragten des B...</td>\n",
       "      <td>[0.43100654]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>THOMAE: Union und SPD haben Einwanderungsgeset...</td>\n",
       "      <td>012020</td>\n",
       "      <td>Zum Kabinettsbeschluss des Nationalen Aktionsp...</td>\n",
       "      <td>[0.49923529]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BUSCHMANN-Interview: Wir sind für ein Einwande...</td>\n",
       "      <td>102017</td>\n",
       "      <td>Der Erste Parlamentarische Geschäftsführer der...</td>\n",
       "      <td>[0.37095062]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>THOMAE: Seehofer muss Weichen für europäische ...</td>\n",
       "      <td>072019</td>\n",
       "      <td>Zur Forderung von EU-Innenkommissar Avramopoul...</td>\n",
       "      <td>[0.37498003]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>DÜRR-Gastbeitrag: Wir müssen aus dem Flüchtlin...</td>\n",
       "      <td>032020</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.46131766]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>LAMBSDORFF: Syrien-Gipfel muss dauerhafte Waff...</td>\n",
       "      <td>022020</td>\n",
       "      <td>Zur Lage in Syrien erklärt der stellvertretend...</td>\n",
       "      <td>[0.39914373]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>BUSCHMANN-Interview: Protestwähler einfach übe...</td>\n",
       "      <td>012019</td>\n",
       "      <td>Der Erste Parlamentarische Geschäftsführer der...</td>\n",
       "      <td>[0.38493332]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>536</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>LINDNER-Interview: Ich gebe keine Seele verloren</td>\n",
       "      <td>012019</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.36518976]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>3119</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>TEUTEBERG-Statement: Fachkräfteeinwanderungsge...</td>\n",
       "      <td>122018</td>\n",
       "      <td>Die migrationspolitische Sprecherin der FDP-Fr...</td>\n",
       "      <td>[0.36321636]</td>\n",
       "      <td>title</td>\n",
       "      <td>559</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>THOMAE: Unstrittige Teile von Dublin IV müssen...</td>\n",
       "      <td>122018</td>\n",
       "      <td>Zum Treffen der EU-Innenminister erklärt der s...</td>\n",
       "      <td>[0.41374124]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>144</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                title    date  \\\n",
       "0   TEUTEBERG: Integrationsbeauftragte sollen sich...  092018   \n",
       "1   THOMAE: Union und SPD haben Einwanderungsgeset...  012020   \n",
       "2   BUSCHMANN-Interview: Wir sind für ein Einwande...  102017   \n",
       "3   THOMAE: Seehofer muss Weichen für europäische ...  072019   \n",
       "4   DÜRR-Gastbeitrag: Wir müssen aus dem Flüchtlin...  032020   \n",
       "..                                                ...     ...   \n",
       "59  LAMBSDORFF: Syrien-Gipfel muss dauerhafte Waff...  022020   \n",
       "60  BUSCHMANN-Interview: Protestwähler einfach übe...  012019   \n",
       "61   LINDNER-Interview: Ich gebe keine Seele verloren  012019   \n",
       "62  TEUTEBERG-Statement: Fachkräfteeinwanderungsge...  122018   \n",
       "63  THOMAE: Unstrittige Teile von Dublin IV müssen...  122018   \n",
       "\n",
       "                                              content         score  \\\n",
       "0   Zum Treffen der Integrationsbeauftragten des B...  [0.43100654]   \n",
       "1   Zum Kabinettsbeschluss des Nationalen Aktionsp...  [0.49923529]   \n",
       "2   Der Erste Parlamentarische Geschäftsführer der...  [0.37095062]   \n",
       "3   Zur Forderung von EU-Innenkommissar Avramopoul...  [0.37498003]   \n",
       "4   Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.46131766]   \n",
       "..                                                ...           ...   \n",
       "59  Zur Lage in Syrien erklärt der stellvertretend...  [0.39914373]   \n",
       "60  Der Erste Parlamentarische Geschäftsführer der...  [0.38493332]   \n",
       "61  Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.36518976]   \n",
       "62  Die migrationspolitische Sprecherin der FDP-Fr...  [0.36321636]   \n",
       "63  Zum Treffen der EU-Innenminister erklärt der s...  [0.41374124]   \n",
       "\n",
       "      how_included  n_words  \n",
       "0   OVER-THRESHOLD      113  \n",
       "1   OVER-THRESHOLD      136  \n",
       "2   OVER-THRESHOLD      682  \n",
       "3   OVER-THRESHOLD      157  \n",
       "4   OVER-THRESHOLD      521  \n",
       "..             ...      ...  \n",
       "59  OVER-THRESHOLD      133  \n",
       "60  OVER-THRESHOLD      536  \n",
       "61  OVER-THRESHOLD     3119  \n",
       "62           title      559  \n",
       "63  OVER-THRESHOLD      144  \n",
       "\n",
       "[64 rows x 6 columns]"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdp_rel = pd.read_csv(\"/Users/ashrakatelshehawy/fdp_refugeerelevant.csv\",header=None, encoding='utf-8',delimiter='\\t',error_bad_lines=False)\n",
    "fdp_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n",
    "fdp_rel['date'] = fdp_rel['date'].apply(lambda x: '{0:0>6}'.format(x))\n",
    "\n",
    "fdp_rel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TEUTEBERG: Integrationsbeauftragte sollen sich...</td>\n",
       "      <td>092018</td>\n",
       "      <td>Zum Treffen der Integrationsbeauftragten des B...</td>\n",
       "      <td>[0.43100654]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>THOMAE: Union macht gut integrierte Flüchtling...</td>\n",
       "      <td>042019</td>\n",
       "      <td>Zu den geplanten Gesetzen zu Abschiebungen und...</td>\n",
       "      <td>[0.39052529]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>SOLMS-Gastbeitrag: Lösungen statt leerer Versp...</td>\n",
       "      <td>072018</td>\n",
       "      <td>Der Ehrenvorsitzende der FDP-Fraktion Dr. Herm...</td>\n",
       "      <td>[0.37069263]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>452</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>THOMAE-Gastbeitrag: Kopftuchverbot für Mädchen...</td>\n",
       "      <td>052019</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.41531358]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>DJIR-SARAI: Humanitäre Katastrophe in Syrien a...</td>\n",
       "      <td>092018</td>\n",
       "      <td>Zum Treffen der Präsidenten Putin und Erdogan ...</td>\n",
       "      <td>[0.36833623]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>110</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>LAMBSDORFF-Interview: Eine sichere Grenze und ...</td>\n",
       "      <td>032020</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.37307199]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>LAMBSDORFF-Interview: Wir Europäer dürfen uns ...</td>\n",
       "      <td>032020</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.39219391]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>LINDNER-Statement: Grundrente ohne Bedürftigke...</td>\n",
       "      <td>112019</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.3667593]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>805</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>LAMBSDORFF-Interview: Es ist eine eindeutig vö...</td>\n",
       "      <td>102019</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.40171996]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>THOMAE-Interview: Bundesregierung hat das Them...</td>\n",
       "      <td>112019</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzenden...</td>\n",
       "      <td>[0.39625729]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>THOMAE: EU-Innenminister müssen über Weiterent...</td>\n",
       "      <td>032020</td>\n",
       "      <td>Zum Sondertreffen der EU-Innenminister erklärt...</td>\n",
       "      <td>[0.39457092]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>THOMAE: Spurwechsel wichtiger Schritt zu besse...</td>\n",
       "      <td>022019</td>\n",
       "      <td>Zur Abstimmung des Bundesrats über Fachkräftee...</td>\n",
       "      <td>[0.39376306]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>THOMAE: Seehofer muss Weichen für europäische ...</td>\n",
       "      <td>072019</td>\n",
       "      <td>Zur Forderung von EU-Innenkommissar Avramopoul...</td>\n",
       "      <td>[0.37498003]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>LINDNER-Statement: Es darf keinen Kontrollverl...</td>\n",
       "      <td>032020</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.36509714]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>859</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>LINDNER/BUSCHMANN-Statement: Aus dem Kassenabs...</td>\n",
       "      <td>052019</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.37225481]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>LINDNER-Interview: Klimaschutz ist was für Profis</td>\n",
       "      <td>032019</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.36536819]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>944</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>TEUTEBERG-Statement: Eckpunktepapier bleibt we...</td>\n",
       "      <td>082018</td>\n",
       "      <td>Die migrationspolitische Sprecherin der FDP-Fr...</td>\n",
       "      <td>[0.41889214]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>LINDNER-Interview: Wir sind jederzeit bereit</td>\n",
       "      <td>062018</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.36516688]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>LAMBSDORFF-Interview: Vertrauen braucht Zeit</td>\n",
       "      <td>102017</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.38013012]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>LAMBSDORFF: EU braucht endlich wirksame Grenzs...</td>\n",
       "      <td>122017</td>\n",
       "      <td>Zum EU-Gipfel erklärt der stellvertretende FDP...</td>\n",
       "      <td>[0.45311206]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>LAMBSDORFF-Interview: Gibt keinen Widerspruch ...</td>\n",
       "      <td>032020</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.43493762]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>558</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>LINDNER-Interview: Ich vermute ein Systemversagen</td>\n",
       "      <td>062018</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.40312485]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>RUPPERT-Interview: Bekämpfung von Antisemitism...</td>\n",
       "      <td>012018</td>\n",
       "      <td>Der Parlamentarische Geschäftsführer der FDP-F...</td>\n",
       "      <td>[0.37320883]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>919</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>THOMAE-Gastbeitrag: Deutschland braucht ein in...</td>\n",
       "      <td>072018</td>\n",
       "      <td>Der stellvertretende FDP-Fraktionsvorsitzende ...</td>\n",
       "      <td>[0.36759491]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>LINDNER-Interview: Ich gebe keine Seele verloren</td>\n",
       "      <td>012019</td>\n",
       "      <td>Der FDP-Fraktionsvorsitzende Christian Lindner...</td>\n",
       "      <td>[0.36518976]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>3119</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                title    date  \\\n",
       "0   TEUTEBERG: Integrationsbeauftragte sollen sich...  092018   \n",
       "6   THOMAE: Union macht gut integrierte Flüchtling...  042019   \n",
       "21  SOLMS-Gastbeitrag: Lösungen statt leerer Versp...  072018   \n",
       "49  THOMAE-Gastbeitrag: Kopftuchverbot für Mädchen...  052019   \n",
       "11  DJIR-SARAI: Humanitäre Katastrophe in Syrien a...  092018   \n",
       "54  LAMBSDORFF-Interview: Eine sichere Grenze und ...  032020   \n",
       "53  LAMBSDORFF-Interview: Wir Europäer dürfen uns ...  032020   \n",
       "44  LINDNER-Statement: Grundrente ohne Bedürftigke...  112019   \n",
       "45  LAMBSDORFF-Interview: Es ist eine eindeutig vö...  102019   \n",
       "41  THOMAE-Interview: Bundesregierung hat das Them...  112019   \n",
       "55  THOMAE: EU-Innenminister müssen über Weiterent...  032020   \n",
       "58  THOMAE: Spurwechsel wichtiger Schritt zu besse...  022019   \n",
       "3   THOMAE: Seehofer muss Weichen für europäische ...  072019   \n",
       "7   LINDNER-Statement: Es darf keinen Kontrollverl...  032020   \n",
       "50  LINDNER/BUSCHMANN-Statement: Aus dem Kassenabs...  052019   \n",
       "56  LINDNER-Interview: Klimaschutz ist was für Profis  032019   \n",
       "14  TEUTEBERG-Statement: Eckpunktepapier bleibt we...  082018   \n",
       "23       LINDNER-Interview: Wir sind jederzeit bereit  062018   \n",
       "40       LAMBSDORFF-Interview: Vertrauen braucht Zeit  102017   \n",
       "39  LAMBSDORFF: EU braucht endlich wirksame Grenzs...  122017   \n",
       "48  LAMBSDORFF-Interview: Gibt keinen Widerspruch ...  032020   \n",
       "28  LINDNER-Interview: Ich vermute ein Systemversagen  062018   \n",
       "35  RUPPERT-Interview: Bekämpfung von Antisemitism...  012018   \n",
       "18  THOMAE-Gastbeitrag: Deutschland braucht ein in...  072018   \n",
       "61   LINDNER-Interview: Ich gebe keine Seele verloren  012019   \n",
       "\n",
       "                                              content         score  \\\n",
       "0   Zum Treffen der Integrationsbeauftragten des B...  [0.43100654]   \n",
       "6   Zu den geplanten Gesetzen zu Abschiebungen und...  [0.39052529]   \n",
       "21  Der Ehrenvorsitzende der FDP-Fraktion Dr. Herm...  [0.37069263]   \n",
       "49  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.41531358]   \n",
       "11  Zum Treffen der Präsidenten Putin und Erdogan ...  [0.36833623]   \n",
       "54  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.37307199]   \n",
       "53  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.39219391]   \n",
       "44  Der FDP-Fraktionsvorsitzende Christian Lindner...   [0.3667593]   \n",
       "45  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.40171996]   \n",
       "41  Der stellvertretende FDP-Fraktionsvorsitzenden...  [0.39625729]   \n",
       "55  Zum Sondertreffen der EU-Innenminister erklärt...  [0.39457092]   \n",
       "58  Zur Abstimmung des Bundesrats über Fachkräftee...  [0.39376306]   \n",
       "3   Zur Forderung von EU-Innenkommissar Avramopoul...  [0.37498003]   \n",
       "7   Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.36509714]   \n",
       "50  Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.37225481]   \n",
       "56  Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.36536819]   \n",
       "14  Die migrationspolitische Sprecherin der FDP-Fr...  [0.41889214]   \n",
       "23  Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.36516688]   \n",
       "40  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.38013012]   \n",
       "39  Zum EU-Gipfel erklärt der stellvertretende FDP...  [0.45311206]   \n",
       "48  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.43493762]   \n",
       "28  Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.40312485]   \n",
       "35  Der Parlamentarische Geschäftsführer der FDP-F...  [0.37320883]   \n",
       "18  Der stellvertretende FDP-Fraktionsvorsitzende ...  [0.36759491]   \n",
       "61  Der FDP-Fraktionsvorsitzende Christian Lindner...  [0.36518976]   \n",
       "\n",
       "      how_included  n_words  \n",
       "0   OVER-THRESHOLD      113  \n",
       "6   OVER-THRESHOLD      123  \n",
       "21  OVER-THRESHOLD      452  \n",
       "49  OVER-THRESHOLD      215  \n",
       "11  OVER-THRESHOLD      110  \n",
       "54  OVER-THRESHOLD      965  \n",
       "53  OVER-THRESHOLD      390  \n",
       "44  OVER-THRESHOLD      805  \n",
       "45  OVER-THRESHOLD      733  \n",
       "41  OVER-THRESHOLD     1184  \n",
       "55  OVER-THRESHOLD      157  \n",
       "58  OVER-THRESHOLD      126  \n",
       "3   OVER-THRESHOLD      157  \n",
       "7   OVER-THRESHOLD      859  \n",
       "50  OVER-THRESHOLD     1264  \n",
       "56  OVER-THRESHOLD      944  \n",
       "14  OVER-THRESHOLD      472  \n",
       "23  OVER-THRESHOLD      861  \n",
       "40  OVER-THRESHOLD     1522  \n",
       "39  OVER-THRESHOLD      168  \n",
       "48  OVER-THRESHOLD      558  \n",
       "28  OVER-THRESHOLD     1127  \n",
       "35  OVER-THRESHOLD      919  \n",
       "18  OVER-THRESHOLD      491  \n",
       "61  OVER-THRESHOLD     3119  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdp_rel_sample=fdp_rel.sample(25)\n",
    "fdp_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['092018',\n",
       " '012020',\n",
       " '102017',\n",
       " '072019',\n",
       " '032020',\n",
       " '032020',\n",
       " '042019',\n",
       " '032020',\n",
       " '022020',\n",
       " '112018',\n",
       " '092018',\n",
       " '092018',\n",
       " '092018',\n",
       " '082018',\n",
       " '082018',\n",
       " '082018',\n",
       " '082018',\n",
       " '082018',\n",
       " '072018',\n",
       " '072018',\n",
       " '072018',\n",
       " '072018',\n",
       " '072018',\n",
       " '062018',\n",
       " '062018',\n",
       " '062018',\n",
       " '062018',\n",
       " '062018',\n",
       " '062018',\n",
       " '052018',\n",
       " '042018',\n",
       " '012020',\n",
       " '042018',\n",
       " '032018',\n",
       " '012020',\n",
       " '012018',\n",
       " '012018',\n",
       " '012018',\n",
       " '122017',\n",
       " '122017',\n",
       " '102017',\n",
       " '112019',\n",
       " '112019',\n",
       " '112019',\n",
       " '112019',\n",
       " '102019',\n",
       " '102019',\n",
       " '072019',\n",
       " '032020',\n",
       " '052019',\n",
       " '052019',\n",
       " '032020',\n",
       " '042019',\n",
       " '032020',\n",
       " '032020',\n",
       " '032020',\n",
       " '032019',\n",
       " '032020',\n",
       " '022019',\n",
       " '022020',\n",
       " '012019',\n",
       " '012019',\n",
       " '122018',\n",
       " '122018']"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdp_rel[\"content\"] = fdp_rel[\"title\"] +[\" \"]+ fdp_rel[\"content\"]\n",
    "\n",
    "fdp_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['012018',\n",
       " '012019',\n",
       " '012020',\n",
       " '022019',\n",
       " '022020',\n",
       " '032018',\n",
       " '032019',\n",
       " '032020',\n",
       " '042018',\n",
       " '042019',\n",
       " '052018',\n",
       " '052019',\n",
       " '062018',\n",
       " '072018',\n",
       " '072019',\n",
       " '082018',\n",
       " '092018',\n",
       " '102017',\n",
       " '102019',\n",
       " '112018',\n",
       " '112019',\n",
       " '122017',\n",
       " '122018']"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fdp_rel=fdp_rel.groupby(['date'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "fdp_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "fdp_rel[\"nlpprocessed\"]=fdp_rel['content'].apply(nlp_pipeline)\n",
    "fdp_rel_final=fdp_rel\n",
    "#convert nlpprocessed column to string\n",
    "fdp_rel_final['liststring'] = [','.join(map(str, l)) for l in fdp_rel_final['nlpprocessed']]\n",
    "fdp_rel_final\n",
    "\n",
    "\n",
    "fdp_rel_final['positive words'] = fdp_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "fdp_rel_final['negative words'] = fdp_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "\n",
    "\n",
    "conditions = [\n",
    "(fdp_rel_final['positive words'] > fdp_rel_final['negative words']),\n",
    "(fdp_rel_final['negative words'] > fdp_rel_final['positive words']),\n",
    "(fdp_rel_final['negative words'] == fdp_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "fdp_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "fdp_rel_final\n",
    "\n",
    "\n",
    "#clean dataset \n",
    "del fdp_rel_final['nlpprocessed']\n",
    "fdp_rel_final = fdp_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"date\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "fdp_rel_final.head()\n",
    "fdp_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/fdp_sentiment.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# greens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Anti-Folter-Konvention endlich umsetzen</td>\n",
       "      <td>62014</td>\n",
       "      <td>Anlässlich des Internationalen Tages zur Unter...</td>\n",
       "      <td>[0.60272454]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Mehr syrische Flüchtlinge zügig aufnehmen</td>\n",
       "      <td>22014</td>\n",
       "      <td>Zu den Ankündigungen einiger Landesinnenminist...</td>\n",
       "      <td>[0.57381765]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Kinderrechte: Frau Schwesig muss endlich Farbe...</td>\n",
       "      <td>62014</td>\n",
       "      <td>Anlässlich des UNICEF-Reports zum 25. Jahresta...</td>\n",
       "      <td>[0.57006574]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AnKER-Zentren verletzen Kinderrechte</td>\n",
       "      <td>52018</td>\n",
       "      <td>Anlässlich des Internationalen Kindertages erk...</td>\n",
       "      <td>[0.56043389]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Aus dem Schatten ins Licht: Armut muss stärker...</td>\n",
       "      <td>102018</td>\n",
       "      <td>Anlässlich des heute erschienenen Schattenberi...</td>\n",
       "      <td>[0.55265271]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>223</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   0       1  \\\n",
       "0            Anti-Folter-Konvention endlich umsetzen   62014   \n",
       "1          Mehr syrische Flüchtlinge zügig aufnehmen   22014   \n",
       "2  Kinderrechte: Frau Schwesig muss endlich Farbe...   62014   \n",
       "3               AnKER-Zentren verletzen Kinderrechte   52018   \n",
       "4  Aus dem Schatten ins Licht: Armut muss stärker...  102018   \n",
       "\n",
       "                                                   2             3  \\\n",
       "0  Anlässlich des Internationalen Tages zur Unter...  [0.60272454]   \n",
       "1  Zu den Ankündigungen einiger Landesinnenminist...  [0.57381765]   \n",
       "2  Anlässlich des UNICEF-Reports zum 25. Jahresta...  [0.57006574]   \n",
       "3  Anlässlich des Internationalen Kindertages erk...  [0.56043389]   \n",
       "4  Anlässlich des heute erschienenen Schattenberi...  [0.55265271]   \n",
       "\n",
       "                4    5  \n",
       "0  OVER-THRESHOLD  249  \n",
       "1  OVER-THRESHOLD  137  \n",
       "2  OVER-THRESHOLD  197  \n",
       "3  OVER-THRESHOLD  141  \n",
       "4  OVER-THRESHOLD  223  "
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "greens_rel = pd.read_csv(\"/Users/ashrakatelshehawy/greens_refugeerelevant.csv\",header=None, encoding='utf-8',delimiter='\\t',error_bad_lines=False)\n",
    "greens_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "greens_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>220</th>\n",
       "      <td>Asylsuchende auf ihrem Weg in Arbeit unterstützen</td>\n",
       "      <td>52015</td>\n",
       "      <td>Zu der heute veröffentlichten Studie der Berte...</td>\n",
       "      <td>[0.38577597]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>130</th>\n",
       "      <td>Trauerspiel auf dem Mittelmeer beenden</td>\n",
       "      <td>12019</td>\n",
       "      <td>Zur heute vom UNHCR veröffentlichten Zahl von ...</td>\n",
       "      <td>[0.4216547]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>Flüchtlingsschutz: Verantwortung gerecht werden</td>\n",
       "      <td>32014</td>\n",
       "      <td>Anlässlich der Rede von Bundespräsident Gauck ...</td>\n",
       "      <td>[0.41882924]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>Beschleunigte Asylverfahren: Einschränkungen s...</td>\n",
       "      <td>112014</td>\n",
       "      <td>Zu den angekündigten Verfahrenserleichterungen...</td>\n",
       "      <td>[0.48179979]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>206</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>152</th>\n",
       "      <td>Fluchtursachen verringern statt Flüchtlinge be...</td>\n",
       "      <td>72015</td>\n",
       "      <td>Zu dem heute geplanten ARD Monitor-Bericht dem...</td>\n",
       "      <td>[0.40912572]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Aktuelle Stunde zur Flüchtlingspolitik beantragt</td>\n",
       "      <td>112015</td>\n",
       "      <td>Wir haben heute eine Aktuelle Stunde zum Thema...</td>\n",
       "      <td>[0.50041898]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>108</th>\n",
       "      <td>Frieden schaffen statt Mauern bauen</td>\n",
       "      <td>82014</td>\n",
       "      <td>Anlässlich des 53. Jahrestages des Mauerbaus e...</td>\n",
       "      <td>[0.43647593]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>223</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>155</th>\n",
       "      <td>Arbeitsmarkt: Menschen mit Migrationshintergru...</td>\n",
       "      <td>102014</td>\n",
       "      <td>Zu der heute vorgestellten IAB-Studie zur Arbe...</td>\n",
       "      <td>[0.4074729]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>186</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Weltflüchtlingstag: Solidarität mit schutzsuch...</td>\n",
       "      <td>62016</td>\n",
       "      <td>Zum Weltflüchtlingstag am 20.6.2016 erklärt Lu...</td>\n",
       "      <td>[0.5115617]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>Bei Internationalen Flüchtlingsgipfeln für Wil...</td>\n",
       "      <td>92016</td>\n",
       "      <td>Zu den Flüchtlingsgipfeln der Vereinten Nation...</td>\n",
       "      <td>[0.39186595]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>379</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>Keine Abkommen mit diktatorischen Staaten zula...</td>\n",
       "      <td>102016</td>\n",
       "      <td>Zur flüchtlingspolitischen Debatte im heute be...</td>\n",
       "      <td>[0.44608472]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>Statements von Anton Hofreiter und Katrin Göri...</td>\n",
       "      <td>92015</td>\n",
       "      <td>Auszüge aus dem TV- Statement von Anton Hofrei...</td>\n",
       "      <td>[0.40666575]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>1095</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Sofortmaßnahmen für syrische Flüchtlinge erfor...</td>\n",
       "      <td>122014</td>\n",
       "      <td>Zur Syrienkonferenz der UN-Flüchtlingswerks in...</td>\n",
       "      <td>[0.5189805]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>325</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>Statement Katrin Göring-Eckardt zu Flüchtlinge...</td>\n",
       "      <td>42015</td>\n",
       "      <td>Das Flüchtlingsdrama auf dem Mittelmeer weitet...</td>\n",
       "      <td>[0.40725253]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>200</th>\n",
       "      <td>Bundesregierung muss sich für Deeskalation in ...</td>\n",
       "      <td>22020</td>\n",
       "      <td>Zur Offensive der syrischen Armee auf Idlib er...</td>\n",
       "      <td>[0.39306209]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>112</th>\n",
       "      <td>20 Jahre Srebrenica: Das Grauen mitten in Europa</td>\n",
       "      <td>72015</td>\n",
       "      <td>Zum Gedenken an den Völkermord in Srebrenica e...</td>\n",
       "      <td>[0.43578405]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>Bleiberecht: Zuckerbrot-und-Peitsche-Politik b...</td>\n",
       "      <td>122014</td>\n",
       "      <td>Zu den bekanntgewordenen Inhalten eines überar...</td>\n",
       "      <td>[0.47286533]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>Einwanderungsgesetz ohne Einwanderung</td>\n",
       "      <td>112018</td>\n",
       "      <td>Zum Referentenentwurf eines Fachkräfteeinwande...</td>\n",
       "      <td>[0.39341042]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>De Maizière handelt unverantwortlich</td>\n",
       "      <td>62016</td>\n",
       "      <td>Wir haben heute eine Aktuelle Stunde mit dem T...</td>\n",
       "      <td>[0.38946776]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>Global Compact for Migration: Rückzug Österrei...</td>\n",
       "      <td>112018</td>\n",
       "      <td>Zum Rückzug Österreichs aus dem Global Compact...</td>\n",
       "      <td>[0.40262393]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>216</th>\n",
       "      <td>Integrationsgipfel - Bekenntnis zum Einwanderu...</td>\n",
       "      <td>62018</td>\n",
       "      <td>Zum Integrationsgipfel erklärt Filiz Polat, Sp...</td>\n",
       "      <td>[0.3877078]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>243</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>188</th>\n",
       "      <td>Demokratiefördergesetz jetzt!</td>\n",
       "      <td>102019</td>\n",
       "      <td>Zu den aktuellen Umstrukturierungen beim Bunde...</td>\n",
       "      <td>[0.39573753]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Start des Basiskontos: Asylsuchende werden imm...</td>\n",
       "      <td>62016</td>\n",
       "      <td>Zum Inkrafttreten des Zahlungskontengesetzes a...</td>\n",
       "      <td>[0.48630474]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>201</th>\n",
       "      <td>NSU-Mordserie: Von Konsequenzen ist zwei Jahre...</td>\n",
       "      <td>112013</td>\n",
       "      <td>Zum zweiten Jahrestag des Bekanntwerdens der N...</td>\n",
       "      <td>[0.39279307]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>Grüne begrüßen Stärkung der Rechte von homosex...</td>\n",
       "      <td>122014</td>\n",
       "      <td>Zum heute veröffentlichten Urteil des Europäis...</td>\n",
       "      <td>[0.46726323]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>181</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title    date  \\\n",
       "220  Asylsuchende auf ihrem Weg in Arbeit unterstützen   52015   \n",
       "130             Trauerspiel auf dem Mittelmeer beenden   12019   \n",
       "134    Flüchtlingsschutz: Verantwortung gerecht werden   32014   \n",
       "44   Beschleunigte Asylverfahren: Einschränkungen s...  112014   \n",
       "152  Fluchtursachen verringern statt Flüchtlinge be...   72015   \n",
       "29    Aktuelle Stunde zur Flüchtlingspolitik beantragt  112015   \n",
       "108                Frieden schaffen statt Mauern bauen   82014   \n",
       "155  Arbeitsmarkt: Menschen mit Migrationshintergru...  102014   \n",
       "17   Weltflüchtlingstag: Solidarität mit schutzsuch...   62016   \n",
       "204  Bei Internationalen Flüchtlingsgipfeln für Wil...   92016   \n",
       "88   Keine Abkommen mit diktatorischen Staaten zula...  102016   \n",
       "158  Statements von Anton Hofreiter und Katrin Göri...   92015   \n",
       "14   Sofortmaßnahmen für syrische Flüchtlinge erfor...  122014   \n",
       "156  Statement Katrin Göring-Eckardt zu Flüchtlinge...   42015   \n",
       "200  Bundesregierung muss sich für Deeskalation in ...   22020   \n",
       "112   20 Jahre Srebrenica: Das Grauen mitten in Europa   72015   \n",
       "55   Bleiberecht: Zuckerbrot-und-Peitsche-Politik b...  122014   \n",
       "198              Einwanderungsgesetz ohne Einwanderung  112018   \n",
       "212               De Maizière handelt unverantwortlich   62016   \n",
       "169  Global Compact for Migration: Rückzug Österrei...  112018   \n",
       "216  Integrationsgipfel - Bekenntnis zum Einwanderu...   62018   \n",
       "188                      Demokratiefördergesetz jetzt!  102019   \n",
       "40   Start des Basiskontos: Asylsuchende werden imm...   62016   \n",
       "201  NSU-Mordserie: Von Konsequenzen ist zwei Jahre...  112013   \n",
       "61   Grüne begrüßen Stärkung der Rechte von homosex...  122014   \n",
       "\n",
       "                                               content         score  \\\n",
       "220  Zu der heute veröffentlichten Studie der Berte...  [0.38577597]   \n",
       "130  Zur heute vom UNHCR veröffentlichten Zahl von ...   [0.4216547]   \n",
       "134  Anlässlich der Rede von Bundespräsident Gauck ...  [0.41882924]   \n",
       "44   Zu den angekündigten Verfahrenserleichterungen...  [0.48179979]   \n",
       "152  Zu dem heute geplanten ARD Monitor-Bericht dem...  [0.40912572]   \n",
       "29   Wir haben heute eine Aktuelle Stunde zum Thema...  [0.50041898]   \n",
       "108  Anlässlich des 53. Jahrestages des Mauerbaus e...  [0.43647593]   \n",
       "155  Zu der heute vorgestellten IAB-Studie zur Arbe...   [0.4074729]   \n",
       "17   Zum Weltflüchtlingstag am 20.6.2016 erklärt Lu...   [0.5115617]   \n",
       "204  Zu den Flüchtlingsgipfeln der Vereinten Nation...  [0.39186595]   \n",
       "88   Zur flüchtlingspolitischen Debatte im heute be...  [0.44608472]   \n",
       "158  Auszüge aus dem TV- Statement von Anton Hofrei...  [0.40666575]   \n",
       "14   Zur Syrienkonferenz der UN-Flüchtlingswerks in...   [0.5189805]   \n",
       "156  Das Flüchtlingsdrama auf dem Mittelmeer weitet...  [0.40725253]   \n",
       "200  Zur Offensive der syrischen Armee auf Idlib er...  [0.39306209]   \n",
       "112  Zum Gedenken an den Völkermord in Srebrenica e...  [0.43578405]   \n",
       "55   Zu den bekanntgewordenen Inhalten eines überar...  [0.47286533]   \n",
       "198  Zum Referentenentwurf eines Fachkräfteeinwande...  [0.39341042]   \n",
       "212  Wir haben heute eine Aktuelle Stunde mit dem T...  [0.38946776]   \n",
       "169  Zum Rückzug Österreichs aus dem Global Compact...  [0.40262393]   \n",
       "216  Zum Integrationsgipfel erklärt Filiz Polat, Sp...   [0.3877078]   \n",
       "188  Zu den aktuellen Umstrukturierungen beim Bunde...  [0.39573753]   \n",
       "40   Zum Inkrafttreten des Zahlungskontengesetzes a...  [0.48630474]   \n",
       "201  Zum zweiten Jahrestag des Bekanntwerdens der N...  [0.39279307]   \n",
       "61   Zum heute veröffentlichten Urteil des Europäis...  [0.46726323]   \n",
       "\n",
       "       how_included  n_words  \n",
       "220  OVER-THRESHOLD      203  \n",
       "130  OVER-THRESHOLD      378  \n",
       "134  OVER-THRESHOLD      149  \n",
       "44   OVER-THRESHOLD      206  \n",
       "152  OVER-THRESHOLD      282  \n",
       "29   OVER-THRESHOLD      247  \n",
       "108  OVER-THRESHOLD      223  \n",
       "155  OVER-THRESHOLD      186  \n",
       "17   OVER-THRESHOLD      388  \n",
       "204  OVER-THRESHOLD      379  \n",
       "88   OVER-THRESHOLD      370  \n",
       "158  OVER-THRESHOLD     1095  \n",
       "14   OVER-THRESHOLD      325  \n",
       "156  OVER-THRESHOLD      502  \n",
       "200  OVER-THRESHOLD      256  \n",
       "112  OVER-THRESHOLD      422  \n",
       "55   OVER-THRESHOLD      237  \n",
       "198  OVER-THRESHOLD      314  \n",
       "212  OVER-THRESHOLD      166  \n",
       "169  OVER-THRESHOLD      343  \n",
       "216  OVER-THRESHOLD      243  \n",
       "188  OVER-THRESHOLD      323  \n",
       "40   OVER-THRESHOLD      131  \n",
       "201  OVER-THRESHOLD      235  \n",
       "61   OVER-THRESHOLD      181  "
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "greens_rel_sample=greens_rel.sample(25)\n",
    "greens_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['062014',\n",
       " '022014',\n",
       " '062014',\n",
       " '052018',\n",
       " '102018',\n",
       " '032014',\n",
       " '062015',\n",
       " '032016',\n",
       " '022015',\n",
       " '042016',\n",
       " '122015',\n",
       " '112015',\n",
       " '052016',\n",
       " '102015',\n",
       " '122014',\n",
       " '082017',\n",
       " '122014',\n",
       " '062016',\n",
       " '072015',\n",
       " '032015',\n",
       " '022016',\n",
       " '032016',\n",
       " '112015',\n",
       " '072016',\n",
       " '052014',\n",
       " '112015',\n",
       " '092014',\n",
       " '012015',\n",
       " '012014',\n",
       " '112015',\n",
       " '032017',\n",
       " '032014',\n",
       " '112016',\n",
       " '062014',\n",
       " '072015',\n",
       " '022016',\n",
       " '092014',\n",
       " '122019',\n",
       " '022017',\n",
       " '042015',\n",
       " '062016',\n",
       " '052018',\n",
       " '032014',\n",
       " '062015',\n",
       " '112014',\n",
       " '042016',\n",
       " '032017',\n",
       " '022018',\n",
       " '022016',\n",
       " '122016',\n",
       " '072014',\n",
       " '092015',\n",
       " '012019',\n",
       " '052014',\n",
       " '042014',\n",
       " '122014',\n",
       " '092015',\n",
       " '062017',\n",
       " '102016',\n",
       " '032018',\n",
       " '082018',\n",
       " '122014',\n",
       " '102014',\n",
       " '042014',\n",
       " '082016',\n",
       " '022014',\n",
       " '122019',\n",
       " '012014',\n",
       " '092015',\n",
       " '112015',\n",
       " '072016',\n",
       " '082015',\n",
       " '022016',\n",
       " '092016',\n",
       " '062019',\n",
       " '072015',\n",
       " '082018',\n",
       " '092016',\n",
       " '042016',\n",
       " '102018',\n",
       " '032019',\n",
       " '122019',\n",
       " '072015',\n",
       " '082019',\n",
       " '102015',\n",
       " '092018',\n",
       " '072018',\n",
       " '122015',\n",
       " '102016',\n",
       " '032018',\n",
       " '042015',\n",
       " '122015',\n",
       " '092015',\n",
       " '012016',\n",
       " '102015',\n",
       " '062018',\n",
       " '112015',\n",
       " '072015',\n",
       " '122016',\n",
       " '032015',\n",
       " '082014',\n",
       " '052014',\n",
       " '052018',\n",
       " '092016',\n",
       " '082015',\n",
       " '052016',\n",
       " '042015',\n",
       " '122014',\n",
       " '082014',\n",
       " '072015',\n",
       " '082017',\n",
       " '062014',\n",
       " '072015',\n",
       " '082018',\n",
       " '022020',\n",
       " '112015',\n",
       " '122016',\n",
       " '072014',\n",
       " '092019',\n",
       " '122014',\n",
       " '042015',\n",
       " '052017',\n",
       " '122016',\n",
       " '122015',\n",
       " '052015',\n",
       " '022014',\n",
       " '032020',\n",
       " '062017',\n",
       " '122016',\n",
       " '102015',\n",
       " '012019',\n",
       " '052015',\n",
       " '072014',\n",
       " '122016',\n",
       " '032014',\n",
       " '112014',\n",
       " '032015',\n",
       " '092016',\n",
       " '012016',\n",
       " '102015',\n",
       " '032014',\n",
       " '082015',\n",
       " '022017',\n",
       " '112015',\n",
       " '122014',\n",
       " '052016',\n",
       " '012014',\n",
       " '082019',\n",
       " '042019',\n",
       " '042016',\n",
       " '122016',\n",
       " '052017',\n",
       " '072015',\n",
       " '022015',\n",
       " '072017',\n",
       " '102014',\n",
       " '042015',\n",
       " '082018',\n",
       " '092015',\n",
       " '112014',\n",
       " '122015',\n",
       " '122014',\n",
       " '032017',\n",
       " '062015',\n",
       " '022014',\n",
       " '052016',\n",
       " '022014',\n",
       " '042019',\n",
       " '052015',\n",
       " '112018',\n",
       " '122019',\n",
       " '022020',\n",
       " '102015',\n",
       " '052019',\n",
       " '102019',\n",
       " '032018',\n",
       " '122018',\n",
       " '012017',\n",
       " '092017',\n",
       " '012016',\n",
       " '122016',\n",
       " '082014',\n",
       " '082014',\n",
       " '072016',\n",
       " '032017',\n",
       " '102015',\n",
       " '052019',\n",
       " '072019',\n",
       " '102019',\n",
       " '112016',\n",
       " '052015',\n",
       " '112015',\n",
       " '012015',\n",
       " '112016',\n",
       " '082014',\n",
       " '122019',\n",
       " '072016',\n",
       " '082015',\n",
       " '112018',\n",
       " '042018',\n",
       " '022020',\n",
       " '112013',\n",
       " '062014',\n",
       " '022014',\n",
       " '092016',\n",
       " '042018',\n",
       " '012019',\n",
       " '022016',\n",
       " '042017',\n",
       " '102015',\n",
       " '032017',\n",
       " '092015',\n",
       " '062016',\n",
       " '122016',\n",
       " '122014',\n",
       " '102014',\n",
       " '062018',\n",
       " '122018',\n",
       " '112016',\n",
       " '122015',\n",
       " '052015',\n",
       " '012017',\n",
       " '112018',\n",
       " '062016',\n",
       " '092018',\n",
       " '022020']"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "greens_rel[\"content\"] = greens_rel[\"title\"] +[\" \"]+ greens_rel[\"content\"] \n",
    "greens_rel['date'] = greens_rel['date'].apply(lambda x: '{0:0>6}'.format(x))\n",
    "\n",
    "greens_rel['date'].values.tolist() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['012014',\n",
       " '012015',\n",
       " '012016',\n",
       " '012017',\n",
       " '012019',\n",
       " '022014',\n",
       " '022015',\n",
       " '022016',\n",
       " '022017',\n",
       " '022018',\n",
       " '022020',\n",
       " '032014',\n",
       " '032015',\n",
       " '032016',\n",
       " '032017',\n",
       " '032018',\n",
       " '032019',\n",
       " '032020',\n",
       " '042014',\n",
       " '042015',\n",
       " '042016',\n",
       " '042017',\n",
       " '042018',\n",
       " '042019',\n",
       " '052014',\n",
       " '052015',\n",
       " '052016',\n",
       " '052017',\n",
       " '052018',\n",
       " '052019',\n",
       " '062014',\n",
       " '062015',\n",
       " '062016',\n",
       " '062017',\n",
       " '062018',\n",
       " '062019',\n",
       " '072014',\n",
       " '072015',\n",
       " '072016',\n",
       " '072017',\n",
       " '072018',\n",
       " '072019',\n",
       " '082014',\n",
       " '082015',\n",
       " '082016',\n",
       " '082017',\n",
       " '082018',\n",
       " '082019',\n",
       " '092014',\n",
       " '092015',\n",
       " '092016',\n",
       " '092017',\n",
       " '092018',\n",
       " '092019',\n",
       " '102014',\n",
       " '102015',\n",
       " '102016',\n",
       " '102018',\n",
       " '102019',\n",
       " '112013',\n",
       " '112014',\n",
       " '112015',\n",
       " '112016',\n",
       " '112018',\n",
       " '122014',\n",
       " '122015',\n",
       " '122016',\n",
       " '122018',\n",
       " '122019']"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "greens_rel=greens_rel.groupby(['date'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "greens_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Kinderrechte: Bundesregierung muss vor UN-Auss...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Ausbildungsgarantie für junge Flüchtlinge Zum ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Asylpaket: Aussetzung von Familiennachzug ist ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Arbeitsmarktzahlen: 2017 muss das Jahr der Arb...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Deutschland muss bei Menschenrechten Haltung u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>122014</td>\n",
       "      <td>Sofortmaßnahmen für syrische Flüchtlinge erfor...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>122015</td>\n",
       "      <td>Amadeu Antonio - Erinnerung als Auftrag Zum mo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66</th>\n",
       "      <td>122016</td>\n",
       "      <td>Aktuelle Stunde zum Doppelpass: Hier geboren, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>122018</td>\n",
       "      <td>Ein Meilenstein für Migration und Multilateral...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>122019</td>\n",
       "      <td>Menschenrechtsverteidigerinnen und -verteidige...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>69 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      date                                            content\n",
       "0   012014  Kinderrechte: Bundesregierung muss vor UN-Auss...\n",
       "1   012015  Ausbildungsgarantie für junge Flüchtlinge Zum ...\n",
       "2   012016  Asylpaket: Aussetzung von Familiennachzug ist ...\n",
       "3   012017  Arbeitsmarktzahlen: 2017 muss das Jahr der Arb...\n",
       "4   012019  Deutschland muss bei Menschenrechten Haltung u...\n",
       "..     ...                                                ...\n",
       "64  122014  Sofortmaßnahmen für syrische Flüchtlinge erfor...\n",
       "65  122015  Amadeu Antonio - Erinnerung als Auftrag Zum mo...\n",
       "66  122016  Aktuelle Stunde zum Doppelpass: Hier geboren, ...\n",
       "67  122018  Ein Meilenstein für Migration und Multilateral...\n",
       "68  122019  Menschenrechtsverteidigerinnen und -verteidige...\n",
       "\n",
       "[69 rows x 2 columns]"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "greens_rel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "greens_rel[\"nlpprocessed\"]=greens_rel['content'].apply(nlp_pipeline)\n",
    "greens_rel_final=greens_rel\n",
    "#convert nlpprocessed column to string\n",
    "greens_rel_final['liststring'] = [','.join(map(str, l)) for l in greens_rel_final['nlpprocessed']]\n",
    "greens_rel_final\n",
    "\n",
    "\n",
    "greens_rel_final['positive words'] = greens_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "greens_rel_final['negative words'] = greens_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "\n",
    "\n",
    "conditions = [\n",
    "(greens_rel_final['positive words'] > greens_rel_final['negative words']),\n",
    "(greens_rel_final['negative words'] > greens_rel_final['positive words']),\n",
    "(greens_rel_final['negative words'] == greens_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "greens_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "greens_rel_final\n",
    "\n",
    "\n",
    "#clean data\n",
    "del greens_rel_final['nlpprocessed']\n",
    "greens_rel_final = greens_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"date\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "greens_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/greens_sentiment.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# linke"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Tunesien, Algerien und Marokko sind keine sich...</td>\n",
       "      <td>32017</td>\n",
       "      <td>„Es ist absurd, die Maghreb-Staaten Tunesien, ...</td>\n",
       "      <td>[0.62473891]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Diskriminierungs- und barrierefreien Zugang zu...</td>\n",
       "      <td>112018</td>\n",
       "      <td>„Die Übernahme der Kosten für die HIV-Prophyla...</td>\n",
       "      <td>[0.60934525]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Opfer von Menschenhandel europaweit schützen</td>\n",
       "      <td>102019</td>\n",
       "      <td>Anlässlich des Europäischen Tages gegen Mensch...</td>\n",
       "      <td>[0.60774909]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zehn Jahre Zuwanderungsgesetz sind kein Grund ...</td>\n",
       "      <td>42015</td>\n",
       "      <td>„Zuwanderungsgesetz hin oder her: Noch immer w...</td>\n",
       "      <td>[0.59632717]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Roma vor Diskriminierung und Rassismus schützen</td>\n",
       "      <td>42014</td>\n",
       "      <td>Zum Schutz der Roma vor Diskriminierung und Ra...</td>\n",
       "      <td>[0.57479433]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>242</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title    date  \\\n",
       "0  Tunesien, Algerien und Marokko sind keine sich...   32017   \n",
       "1  Diskriminierungs- und barrierefreien Zugang zu...  112018   \n",
       "2       Opfer von Menschenhandel europaweit schützen  102019   \n",
       "3  Zehn Jahre Zuwanderungsgesetz sind kein Grund ...   42015   \n",
       "4    Roma vor Diskriminierung und Rassismus schützen   42014   \n",
       "\n",
       "                                             content         score  \\\n",
       "0  „Es ist absurd, die Maghreb-Staaten Tunesien, ...  [0.62473891]   \n",
       "1  „Die Übernahme der Kosten für die HIV-Prophyla...  [0.60934525]   \n",
       "2  Anlässlich des Europäischen Tages gegen Mensch...  [0.60774909]   \n",
       "3  „Zuwanderungsgesetz hin oder her: Noch immer w...  [0.59632717]   \n",
       "4  Zum Schutz der Roma vor Diskriminierung und Ra...  [0.57479433]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      141  \n",
       "1  OVER-THRESHOLD      228  \n",
       "2  OVER-THRESHOLD      208  \n",
       "3  OVER-THRESHOLD      268  \n",
       "4  OVER-THRESHOLD      242  "
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linke_rel = pd.read_csv(\"/Users/ashrakatelshehawy/linke_refugeerelevant.csv\",header=None, encoding='utf-8',delimiter='\\t',error_bad_lines=False)\n",
    "\n",
    "\n",
    "linke_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n",
    "linke_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>218</th>\n",
       "      <td>EU diskutiert sich in eine humanitäre Katastro...</td>\n",
       "      <td>62015</td>\n",
       "      <td>Während die EU-Innenminister in Luxemburg erge...</td>\n",
       "      <td>[0.43742313]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>206</th>\n",
       "      <td>Humanitäre Katastrophe im Mittelmeer sofort be...</td>\n",
       "      <td>72019</td>\n",
       "      <td>„Die Bundesregierung darf sich mit den Solidar...</td>\n",
       "      <td>[0.43991396]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>Unmenschliche Abschottung an der türkisch-grie...</td>\n",
       "      <td>32020</td>\n",
       "      <td>„Ich bin fassungslos und erschüttert über die ...</td>\n",
       "      <td>[0.46231399]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>260</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>UN-Bericht bestätigt: Abschiebungen nach Afgha...</td>\n",
       "      <td>102018</td>\n",
       "      <td>„Der Bericht der UN-Mission in Afghanistan bew...</td>\n",
       "      <td>[0.46058295]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>Integrationsangebote statt Integrationsverpfli...</td>\n",
       "      <td>112015</td>\n",
       "      <td>Wenn das zweite Asylpaket wie von der Union ge...</td>\n",
       "      <td>[0.41258011]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>424</th>\n",
       "      <td>Hasstiraden und rassistischem Müll entgegentreten</td>\n",
       "      <td>92015</td>\n",
       "      <td>„Hass- und Gewaltposts widersprechen den Regel...</td>\n",
       "      <td>[0.39590564]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>EU-Militäreinsatz EUNAVFOR MED macht Fluchtweg...</td>\n",
       "      <td>102015</td>\n",
       "      <td>Statt Fluchtursachen zu bekämpfen, schottet si...</td>\n",
       "      <td>[0.44752676]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>Abschiebungen in den Krieg sofort stoppen</td>\n",
       "      <td>42017</td>\n",
       "      <td>„Die nunmehr fünfte Sammelabschiebung von Flüc...</td>\n",
       "      <td>[0.49936129]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>154</th>\n",
       "      <td>Integration ist eine soziale, keine religiöse ...</td>\n",
       "      <td>12014</td>\n",
       "      <td>Die deutsche Islamkonferenz war bisher nicht d...</td>\n",
       "      <td>[0.46178225]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>433</th>\n",
       "      <td>Residenzpflicht ersatzlos streichen</td>\n",
       "      <td>102014</td>\n",
       "      <td>Mit dem jetzt beschlossenen Gesetzentwurf geht...</td>\n",
       "      <td>[0.3949221]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222</th>\n",
       "      <td>Zahlen strafen brandgefährliche Hetze aus Unio...</td>\n",
       "      <td>72018</td>\n",
       "      <td>„Die aktuellen Zahlen zum Familiennachzug stra...</td>\n",
       "      <td>[0.43690899]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>207</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>313</th>\n",
       "      <td>LINKE lehnt Nützlichkeitsrassismus ab</td>\n",
       "      <td>92016</td>\n",
       "      <td>„Die Forderung nach einem Einwanderungsgesetz ...</td>\n",
       "      <td>[0.41777477]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>EU-Gipfel der faulen Kompromisse</td>\n",
       "      <td>62015</td>\n",
       "      <td>Gerade jetzt, wo gemeinsames und beherztes Han...</td>\n",
       "      <td>[0.4730575]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>464</th>\n",
       "      <td>Spitzelbehörde DITIB kann kein Partner sein</td>\n",
       "      <td>32017</td>\n",
       "      <td>„Je mehr die Bundesregierung Integration von e...</td>\n",
       "      <td>[0.38789832]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223</th>\n",
       "      <td>Konsequenzen aus der Verhaftung des Kooperatio...</td>\n",
       "      <td>112019</td>\n",
       "      <td>„Die Bundesregierung muss jetzt Druck auf das ...</td>\n",
       "      <td>[0.43685322]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>406</th>\n",
       "      <td>Integration und Schutz statt Abschreckung und ...</td>\n",
       "      <td>92017</td>\n",
       "      <td>„Statt populistischer Hetze und dem Ruf nach m...</td>\n",
       "      <td>[0.3997295]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>112</th>\n",
       "      <td>Merkel surft auf der rechtspopulistischen Welle</td>\n",
       "      <td>52014</td>\n",
       "      <td>Mit ihrem Gerede über angeblichen Missbrauch v...</td>\n",
       "      <td>[0.47574534]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>207</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>Ungarn-Referendum: Orbán gescheitert, aber län...</td>\n",
       "      <td>102016</td>\n",
       "      <td>„Ich bin froh, dass sich die Mehrheit der unga...</td>\n",
       "      <td>[0.40721506]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>177</th>\n",
       "      <td>Terrororganisation Islamischer Staat in Deutsc...</td>\n",
       "      <td>82014</td>\n",
       "      <td>Die Terrororganisation Islamischer Staat (IS) ...</td>\n",
       "      <td>[0.44949707]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>407</th>\n",
       "      <td>Deutschland braucht ein Flüchtlingsaufnahmegesetz</td>\n",
       "      <td>62015</td>\n",
       "      <td>Der Handlungsbedarf für den Bund ist groß: Die...</td>\n",
       "      <td>[0.39880759]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>190</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>Der Frauentag ist auch ein Tag gegen Gewalt</td>\n",
       "      <td>32020</td>\n",
       "      <td>Am 8. März gehen Frauen weltweit auf die Straß...</td>\n",
       "      <td>[0.4500444]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>Strobl betreibt AfD-Hetze</td>\n",
       "      <td>112016</td>\n",
       "      <td>„Strobl betreibt blanke AfD-Werbung, indem er ...</td>\n",
       "      <td>[0.4675376]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>329</th>\n",
       "      <td>UN-Gipfel in Istanbul hofiert den Autokraten E...</td>\n",
       "      <td>52016</td>\n",
       "      <td>„Angesichts des Krieges des türkischen Erdogan...</td>\n",
       "      <td>[0.41447118]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>302</th>\n",
       "      <td>Mehrstaatigkeit grundsätzlich akzeptieren</td>\n",
       "      <td>122014</td>\n",
       "      <td>Ein wirkliches Weihnachtsgeschenk wäre der gru...</td>\n",
       "      <td>[0.42017016]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>361</th>\n",
       "      <td>Kanzlerin Merkel ist maßgeblich für humanitäre...</td>\n",
       "      <td>32016</td>\n",
       "      <td>„Die Entscheidung der Hilfsorganisationen, ih...</td>\n",
       "      <td>[0.40802826]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>231</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title    date  \\\n",
       "218  EU diskutiert sich in eine humanitäre Katastro...   62015   \n",
       "206  Humanitäre Katastrophe im Mittelmeer sofort be...   72019   \n",
       "153  Unmenschliche Abschottung an der türkisch-grie...   32020   \n",
       "158  UN-Bericht bestätigt: Abschiebungen nach Afgha...  102018   \n",
       "334  Integrationsangebote statt Integrationsverpfli...  112015   \n",
       "424  Hasstiraden und rassistischem Müll entgegentreten   92015   \n",
       "183  EU-Militäreinsatz EUNAVFOR MED macht Fluchtweg...  102015   \n",
       "60           Abschiebungen in den Krieg sofort stoppen   42017   \n",
       "154  Integration ist eine soziale, keine religiöse ...   12014   \n",
       "433                Residenzpflicht ersatzlos streichen  102014   \n",
       "222  Zahlen strafen brandgefährliche Hetze aus Unio...   72018   \n",
       "313              LINKE lehnt Nützlichkeitsrassismus ab   92016   \n",
       "115                  EU-Gipfel der faulen Kompromisse    62015   \n",
       "464        Spitzelbehörde DITIB kann kein Partner sein   32017   \n",
       "223  Konsequenzen aus der Verhaftung des Kooperatio...  112019   \n",
       "406  Integration und Schutz statt Abschreckung und ...   92017   \n",
       "112    Merkel surft auf der rechtspopulistischen Welle   52014   \n",
       "366  Ungarn-Referendum: Orbán gescheitert, aber län...  102016   \n",
       "177  Terrororganisation Islamischer Staat in Deutsc...   82014   \n",
       "407  Deutschland braucht ein Flüchtlingsaufnahmegesetz   62015   \n",
       "174        Der Frauentag ist auch ein Tag gegen Gewalt   32020   \n",
       "135                          Strobl betreibt AfD-Hetze  112016   \n",
       "329  UN-Gipfel in Istanbul hofiert den Autokraten E...   52016   \n",
       "302          Mehrstaatigkeit grundsätzlich akzeptieren  122014   \n",
       "361  Kanzlerin Merkel ist maßgeblich für humanitäre...   32016   \n",
       "\n",
       "                                               content         score  \\\n",
       "218  Während die EU-Innenminister in Luxemburg erge...  [0.43742313]   \n",
       "206  „Die Bundesregierung darf sich mit den Solidar...  [0.43991396]   \n",
       "153  „Ich bin fassungslos und erschüttert über die ...  [0.46231399]   \n",
       "158  „Der Bericht der UN-Mission in Afghanistan bew...  [0.46058295]   \n",
       "334  Wenn das zweite Asylpaket wie von der Union ge...  [0.41258011]   \n",
       "424  „Hass- und Gewaltposts widersprechen den Regel...  [0.39590564]   \n",
       "183  Statt Fluchtursachen zu bekämpfen, schottet si...  [0.44752676]   \n",
       "60   „Die nunmehr fünfte Sammelabschiebung von Flüc...  [0.49936129]   \n",
       "154  Die deutsche Islamkonferenz war bisher nicht d...  [0.46178225]   \n",
       "433  Mit dem jetzt beschlossenen Gesetzentwurf geht...   [0.3949221]   \n",
       "222  „Die aktuellen Zahlen zum Familiennachzug stra...  [0.43690899]   \n",
       "313  „Die Forderung nach einem Einwanderungsgesetz ...  [0.41777477]   \n",
       "115  Gerade jetzt, wo gemeinsames und beherztes Han...   [0.4730575]   \n",
       "464  „Je mehr die Bundesregierung Integration von e...  [0.38789832]   \n",
       "223  „Die Bundesregierung muss jetzt Druck auf das ...  [0.43685322]   \n",
       "406  „Statt populistischer Hetze und dem Ruf nach m...   [0.3997295]   \n",
       "112  Mit ihrem Gerede über angeblichen Missbrauch v...  [0.47574534]   \n",
       "366  „Ich bin froh, dass sich die Mehrheit der unga...  [0.40721506]   \n",
       "177  Die Terrororganisation Islamischer Staat (IS) ...  [0.44949707]   \n",
       "407  Der Handlungsbedarf für den Bund ist groß: Die...  [0.39880759]   \n",
       "174  Am 8. März gehen Frauen weltweit auf die Straß...   [0.4500444]   \n",
       "135  „Strobl betreibt blanke AfD-Werbung, indem er ...   [0.4675376]   \n",
       "329  „Angesichts des Krieges des türkischen Erdogan...  [0.41447118]   \n",
       "302  Ein wirkliches Weihnachtsgeschenk wäre der gru...  [0.42017016]   \n",
       "361   „Die Entscheidung der Hilfsorganisationen, ih...  [0.40802826]   \n",
       "\n",
       "       how_included  n_words  \n",
       "218  OVER-THRESHOLD      189  \n",
       "206  OVER-THRESHOLD      183  \n",
       "153  OVER-THRESHOLD      260  \n",
       "158  OVER-THRESHOLD      170  \n",
       "334  OVER-THRESHOLD      219  \n",
       "424  OVER-THRESHOLD      247  \n",
       "183  OVER-THRESHOLD      297  \n",
       "60   OVER-THRESHOLD      179  \n",
       "154  OVER-THRESHOLD      231  \n",
       "433  OVER-THRESHOLD      197  \n",
       "222  OVER-THRESHOLD      207  \n",
       "313  OVER-THRESHOLD      168  \n",
       "115  OVER-THRESHOLD      195  \n",
       "464  OVER-THRESHOLD      183  \n",
       "223  OVER-THRESHOLD      184  \n",
       "406  OVER-THRESHOLD      170  \n",
       "112  OVER-THRESHOLD      207  \n",
       "366  OVER-THRESHOLD      241  \n",
       "177  OVER-THRESHOLD      234  \n",
       "407  OVER-THRESHOLD      190  \n",
       "174  OVER-THRESHOLD      360  \n",
       "135  OVER-THRESHOLD      142  \n",
       "329  OVER-THRESHOLD      242  \n",
       "302  OVER-THRESHOLD      140  \n",
       "361  OVER-THRESHOLD      231  "
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linke_rel_sample=linke_rel.sample(25)\n",
    "linke_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Tunesien, Algerien und Marokko sind keine sich...</td>\n",
       "      <td>032017</td>\n",
       "      <td>Tunesien, Algerien und Marokko sind keine sich...</td>\n",
       "      <td>[0.62473891]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Diskriminierungs- und barrierefreien Zugang zu...</td>\n",
       "      <td>112018</td>\n",
       "      <td>Diskriminierungs- und barrierefreien Zugang zu...</td>\n",
       "      <td>[0.60934525]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Opfer von Menschenhandel europaweit schützen</td>\n",
       "      <td>102019</td>\n",
       "      <td>Opfer von Menschenhandel europaweit schützen A...</td>\n",
       "      <td>[0.60774909]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Zehn Jahre Zuwanderungsgesetz sind kein Grund ...</td>\n",
       "      <td>042015</td>\n",
       "      <td>Zehn Jahre Zuwanderungsgesetz sind kein Grund ...</td>\n",
       "      <td>[0.59632717]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Roma vor Diskriminierung und Rassismus schützen</td>\n",
       "      <td>042014</td>\n",
       "      <td>Roma vor Diskriminierung und Rassismus schütze...</td>\n",
       "      <td>[0.57479433]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>473</th>\n",
       "      <td>Menschen müssen sich wieder sicher fühlen können</td>\n",
       "      <td>072016</td>\n",
       "      <td>Menschen müssen sich wieder sicher fühlen könn...</td>\n",
       "      <td>[0.38527964]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>474</th>\n",
       "      <td>Rückkehrprogramm ist asylfeindlich</td>\n",
       "      <td>022017</td>\n",
       "      <td>Rückkehrprogramm ist asylfeindlich „Mit dem ne...</td>\n",
       "      <td>[0.38525823]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>475</th>\n",
       "      <td>Bremer Asylprüfungen sind Ausdruck flüchtlings...</td>\n",
       "      <td>052018</td>\n",
       "      <td>Bremer Asylprüfungen sind Ausdruck flüchtlings...</td>\n",
       "      <td>[0.37490641]</td>\n",
       "      <td>title</td>\n",
       "      <td>222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>476</th>\n",
       "      <td>Der flüchtlingsfeindliche Konsens zwischen Mer...</td>\n",
       "      <td>072018</td>\n",
       "      <td>Der flüchtlingsfeindliche Konsens zwischen Mer...</td>\n",
       "      <td>[0.34567737]</td>\n",
       "      <td>title</td>\n",
       "      <td>174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>477</th>\n",
       "      <td>Fachkräfteeinwanderung löst Probleme auf dem A...</td>\n",
       "      <td>122018</td>\n",
       "      <td>Fachkräfteeinwanderung löst Probleme auf dem A...</td>\n",
       "      <td>[0.29011488]</td>\n",
       "      <td>title</td>\n",
       "      <td>226</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>478 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title    date  \\\n",
       "0    Tunesien, Algerien und Marokko sind keine sich...  032017   \n",
       "1    Diskriminierungs- und barrierefreien Zugang zu...  112018   \n",
       "2         Opfer von Menschenhandel europaweit schützen  102019   \n",
       "3    Zehn Jahre Zuwanderungsgesetz sind kein Grund ...  042015   \n",
       "4      Roma vor Diskriminierung und Rassismus schützen  042014   \n",
       "..                                                 ...     ...   \n",
       "473   Menschen müssen sich wieder sicher fühlen können  072016   \n",
       "474                 Rückkehrprogramm ist asylfeindlich  022017   \n",
       "475  Bremer Asylprüfungen sind Ausdruck flüchtlings...  052018   \n",
       "476  Der flüchtlingsfeindliche Konsens zwischen Mer...  072018   \n",
       "477  Fachkräfteeinwanderung löst Probleme auf dem A...  122018   \n",
       "\n",
       "                                               content         score  \\\n",
       "0    Tunesien, Algerien und Marokko sind keine sich...  [0.62473891]   \n",
       "1    Diskriminierungs- und barrierefreien Zugang zu...  [0.60934525]   \n",
       "2    Opfer von Menschenhandel europaweit schützen A...  [0.60774909]   \n",
       "3    Zehn Jahre Zuwanderungsgesetz sind kein Grund ...  [0.59632717]   \n",
       "4    Roma vor Diskriminierung und Rassismus schütze...  [0.57479433]   \n",
       "..                                                 ...           ...   \n",
       "473  Menschen müssen sich wieder sicher fühlen könn...  [0.38527964]   \n",
       "474  Rückkehrprogramm ist asylfeindlich „Mit dem ne...  [0.38525823]   \n",
       "475  Bremer Asylprüfungen sind Ausdruck flüchtlings...  [0.37490641]   \n",
       "476  Der flüchtlingsfeindliche Konsens zwischen Mer...  [0.34567737]   \n",
       "477  Fachkräfteeinwanderung löst Probleme auf dem A...  [0.29011488]   \n",
       "\n",
       "       how_included  n_words  \n",
       "0    OVER-THRESHOLD      141  \n",
       "1    OVER-THRESHOLD      228  \n",
       "2    OVER-THRESHOLD      208  \n",
       "3    OVER-THRESHOLD      268  \n",
       "4    OVER-THRESHOLD      242  \n",
       "..              ...      ...  \n",
       "473  OVER-THRESHOLD      153  \n",
       "474  OVER-THRESHOLD      224  \n",
       "475           title      222  \n",
       "476           title      174  \n",
       "477           title      226  \n",
       "\n",
       "[478 rows x 6 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linke_rel[\"content\"] = linke_rel[\"title\"] +[\" \"]+ linke_rel[\"content\"] \n",
    "linke_rel['date'] = linke_rel['date'].apply(lambda x: '{0:0>6}'.format(x))\n",
    "\n",
    "linke_rel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['032017',\n",
       " '112018',\n",
       " '102019',\n",
       " '042015',\n",
       " '042014',\n",
       " '062014',\n",
       " '112016',\n",
       " '042018',\n",
       " '122014',\n",
       " '102018',\n",
       " '072016',\n",
       " '032016',\n",
       " '042015',\n",
       " '012018',\n",
       " '012014',\n",
       " '032016',\n",
       " '072015',\n",
       " '042015',\n",
       " '022016',\n",
       " '012017',\n",
       " '042014',\n",
       " '102019',\n",
       " '092014',\n",
       " '072014',\n",
       " '122014',\n",
       " '072015',\n",
       " '052016',\n",
       " '092015',\n",
       " '112015',\n",
       " '102015',\n",
       " '022018',\n",
       " '012014',\n",
       " '042016',\n",
       " '022015',\n",
       " '092015',\n",
       " '052014',\n",
       " '052016',\n",
       " '012020',\n",
       " '032018',\n",
       " '112019',\n",
       " '022018',\n",
       " '082014',\n",
       " '072017',\n",
       " '122015',\n",
       " '052017',\n",
       " '112017',\n",
       " '072015',\n",
       " '092015',\n",
       " '062016',\n",
       " '092015',\n",
       " '032014',\n",
       " '052015',\n",
       " '032017',\n",
       " '032015',\n",
       " '012020',\n",
       " '032016',\n",
       " '042018',\n",
       " '032014',\n",
       " '042015',\n",
       " '102016',\n",
       " '042017',\n",
       " '012014',\n",
       " '052015',\n",
       " '032014',\n",
       " '062017',\n",
       " '122019',\n",
       " '022016',\n",
       " '082018',\n",
       " '052014',\n",
       " '062019',\n",
       " '012014',\n",
       " '052016',\n",
       " '092015',\n",
       " '082015',\n",
       " '072015',\n",
       " '112015',\n",
       " '072014',\n",
       " '072019',\n",
       " '052017',\n",
       " '052018',\n",
       " '012014',\n",
       " '112014',\n",
       " '062014',\n",
       " '012016',\n",
       " '062015',\n",
       " '072016',\n",
       " '112016',\n",
       " '052016',\n",
       " '112016',\n",
       " '082019',\n",
       " '042019',\n",
       " '042017',\n",
       " '012016',\n",
       " '112017',\n",
       " '112015',\n",
       " '032016',\n",
       " '032014',\n",
       " '102014',\n",
       " '022016',\n",
       " '032020',\n",
       " '112015',\n",
       " '042016',\n",
       " '032017',\n",
       " '022014',\n",
       " '072015',\n",
       " '092017',\n",
       " '092015',\n",
       " '032015',\n",
       " '072016',\n",
       " '102014',\n",
       " '022015',\n",
       " '082016',\n",
       " '052014',\n",
       " '022014',\n",
       " '112015',\n",
       " '062015',\n",
       " '012014',\n",
       " '122018',\n",
       " '092015',\n",
       " '042017',\n",
       " '042015',\n",
       " '042017',\n",
       " '052017',\n",
       " '122017',\n",
       " '042019',\n",
       " '112014',\n",
       " '082016',\n",
       " '122015',\n",
       " '042016',\n",
       " '052016',\n",
       " '062015',\n",
       " '082014',\n",
       " '022016',\n",
       " '062017',\n",
       " '022018',\n",
       " '112016',\n",
       " '122015',\n",
       " '072014',\n",
       " '042017',\n",
       " '112015',\n",
       " '122016',\n",
       " '012016',\n",
       " '082015',\n",
       " '022016',\n",
       " '012018',\n",
       " '032015',\n",
       " '122014',\n",
       " '112014',\n",
       " '012016',\n",
       " '102015',\n",
       " '022017',\n",
       " '062016',\n",
       " '082018',\n",
       " '032020',\n",
       " '012014',\n",
       " '062014',\n",
       " '052015',\n",
       " '112015',\n",
       " '102018',\n",
       " '072015',\n",
       " '112015',\n",
       " '012017',\n",
       " '012015',\n",
       " '022016',\n",
       " '082015',\n",
       " '062014',\n",
       " '092015',\n",
       " '092015',\n",
       " '062018',\n",
       " '012019',\n",
       " '032016',\n",
       " '022014',\n",
       " '072019',\n",
       " '062016',\n",
       " '032020',\n",
       " '032016',\n",
       " '102015',\n",
       " '082014',\n",
       " '082015',\n",
       " '122015',\n",
       " '032020',\n",
       " '082015',\n",
       " '092015',\n",
       " '102015',\n",
       " '082017',\n",
       " '012016',\n",
       " '122014',\n",
       " '072015',\n",
       " '122014',\n",
       " '022017',\n",
       " '092015',\n",
       " '042019',\n",
       " '062015',\n",
       " '012020',\n",
       " '062015',\n",
       " '032018',\n",
       " '012017',\n",
       " '112018',\n",
       " '102015',\n",
       " '082019',\n",
       " '012018',\n",
       " '072015',\n",
       " '052016',\n",
       " '012017',\n",
       " '082015',\n",
       " '092015',\n",
       " '072019',\n",
       " '052019',\n",
       " '102018',\n",
       " '062016',\n",
       " '112015',\n",
       " '032020',\n",
       " '122015',\n",
       " '032016',\n",
       " '022016',\n",
       " '012016',\n",
       " '032016',\n",
       " '122016',\n",
       " '062015',\n",
       " '022015',\n",
       " '032015',\n",
       " '082015',\n",
       " '072018',\n",
       " '112019',\n",
       " '102019',\n",
       " '042019',\n",
       " '032015',\n",
       " '062017',\n",
       " '022014',\n",
       " '112015',\n",
       " '012020',\n",
       " '112017',\n",
       " '012014',\n",
       " '072014',\n",
       " '042015',\n",
       " '042017',\n",
       " '122014',\n",
       " '092014',\n",
       " '072018',\n",
       " '032020',\n",
       " '092016',\n",
       " '042014',\n",
       " '012015',\n",
       " '122015',\n",
       " '122014',\n",
       " '102016',\n",
       " '082016',\n",
       " '062014',\n",
       " '072018',\n",
       " '082015',\n",
       " '092015',\n",
       " '112015',\n",
       " '062018',\n",
       " '012016',\n",
       " '062016',\n",
       " '102015',\n",
       " '122015',\n",
       " '012015',\n",
       " '022017',\n",
       " '032014',\n",
       " '032017',\n",
       " '112015',\n",
       " '102015',\n",
       " '092016',\n",
       " '032018',\n",
       " '022014',\n",
       " '102015',\n",
       " '082018',\n",
       " '082015',\n",
       " '052019',\n",
       " '042015',\n",
       " '062015',\n",
       " '102018',\n",
       " '072014',\n",
       " '042018',\n",
       " '032015',\n",
       " '012016',\n",
       " '102019',\n",
       " '072019',\n",
       " '102015',\n",
       " '032017',\n",
       " '082015',\n",
       " '122015',\n",
       " '032016',\n",
       " '082015',\n",
       " '042016',\n",
       " '042015',\n",
       " '102016',\n",
       " '012018',\n",
       " '012018',\n",
       " '022018',\n",
       " '012016',\n",
       " '112015',\n",
       " '022017',\n",
       " '052014',\n",
       " '012017',\n",
       " '102015',\n",
       " '092018',\n",
       " '102015',\n",
       " '122015',\n",
       " '062016',\n",
       " '032018',\n",
       " '122014',\n",
       " '102016',\n",
       " '082017',\n",
       " '042015',\n",
       " '042015',\n",
       " '032020',\n",
       " '112015',\n",
       " '102016',\n",
       " '012016',\n",
       " '042015',\n",
       " '102014',\n",
       " '092016',\n",
       " '032016',\n",
       " '032014',\n",
       " '072017',\n",
       " '092016',\n",
       " '032017',\n",
       " '092015',\n",
       " '032019',\n",
       " '032014',\n",
       " '052016',\n",
       " '022016',\n",
       " '032016',\n",
       " '102015',\n",
       " '112015',\n",
       " '062017',\n",
       " '072015',\n",
       " '052016',\n",
       " '082015',\n",
       " '082014',\n",
       " '052014',\n",
       " '092019',\n",
       " '112015',\n",
       " '062018',\n",
       " '122015',\n",
       " '082017',\n",
       " '022018',\n",
       " '022017',\n",
       " '032015',\n",
       " '022015',\n",
       " '082014',\n",
       " '102018',\n",
       " '032015',\n",
       " '032015',\n",
       " '032017',\n",
       " '072019',\n",
       " '042019',\n",
       " '042018',\n",
       " '102015',\n",
       " '122019',\n",
       " '122014',\n",
       " '072016',\n",
       " '052019',\n",
       " '092014',\n",
       " '122016',\n",
       " '012014',\n",
       " '022017',\n",
       " '092017',\n",
       " '112016',\n",
       " '032016',\n",
       " '102015',\n",
       " '072016',\n",
       " '032014',\n",
       " '022016',\n",
       " '102016',\n",
       " '022016',\n",
       " '102016',\n",
       " '032017',\n",
       " '102018',\n",
       " '122016',\n",
       " '072018',\n",
       " '062014',\n",
       " '042016',\n",
       " '082016',\n",
       " '082015',\n",
       " '092019',\n",
       " '072016',\n",
       " '092015',\n",
       " '042017',\n",
       " '022014',\n",
       " '042019',\n",
       " '022015',\n",
       " '052018',\n",
       " '092016',\n",
       " '072015',\n",
       " '022015',\n",
       " '112015',\n",
       " '042017',\n",
       " '122017',\n",
       " '042018',\n",
       " '032017',\n",
       " '112014',\n",
       " '032016',\n",
       " '092015',\n",
       " '042014',\n",
       " '072014',\n",
       " '062018',\n",
       " '012020',\n",
       " '012016',\n",
       " '052019',\n",
       " '102014',\n",
       " '112017',\n",
       " '092015',\n",
       " '072019',\n",
       " '092017',\n",
       " '062015',\n",
       " '022016',\n",
       " '082015',\n",
       " '122017',\n",
       " '072015',\n",
       " '062019',\n",
       " '012019',\n",
       " '092018',\n",
       " '062014',\n",
       " '102015',\n",
       " '032016',\n",
       " '052015',\n",
       " '062018',\n",
       " '072016',\n",
       " '062015',\n",
       " '052015',\n",
       " '012014',\n",
       " '092015',\n",
       " '052016',\n",
       " '022016',\n",
       " '092019',\n",
       " '052014',\n",
       " '102017',\n",
       " '062018',\n",
       " '122016',\n",
       " '102015',\n",
       " '102014',\n",
       " '052015',\n",
       " '092018',\n",
       " '022017',\n",
       " '072018',\n",
       " '022016',\n",
       " '072015',\n",
       " '112018',\n",
       " '082016',\n",
       " '092018',\n",
       " '072017',\n",
       " '032016',\n",
       " '122015',\n",
       " '062015',\n",
       " '052017',\n",
       " '062019',\n",
       " '062016',\n",
       " '062016',\n",
       " '052017',\n",
       " '092017',\n",
       " '032017',\n",
       " '082014',\n",
       " '102019',\n",
       " '022016',\n",
       " '092018',\n",
       " '122014',\n",
       " '122015',\n",
       " '072017',\n",
       " '102017',\n",
       " '122017',\n",
       " '012016',\n",
       " '032017',\n",
       " '012016',\n",
       " '122016',\n",
       " '102014',\n",
       " '102016',\n",
       " '112015',\n",
       " '102015',\n",
       " '092015',\n",
       " '072015',\n",
       " '072016',\n",
       " '022017',\n",
       " '052018',\n",
       " '072018',\n",
       " '122018']"
      ]
     },
     "execution_count": 54,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linke_rel['date'].values.tolist() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['012014',\n",
       " '012015',\n",
       " '012016',\n",
       " '012017',\n",
       " '012018',\n",
       " '012019',\n",
       " '012020',\n",
       " '022014',\n",
       " '022015',\n",
       " '022016',\n",
       " '022017',\n",
       " '022018',\n",
       " '032014',\n",
       " '032015',\n",
       " '032016',\n",
       " '032017',\n",
       " '032018',\n",
       " '032019',\n",
       " '032020',\n",
       " '042014',\n",
       " '042015',\n",
       " '042016',\n",
       " '042017',\n",
       " '042018',\n",
       " '042019',\n",
       " '052014',\n",
       " '052015',\n",
       " '052016',\n",
       " '052017',\n",
       " '052018',\n",
       " '052019',\n",
       " '062014',\n",
       " '062015',\n",
       " '062016',\n",
       " '062017',\n",
       " '062018',\n",
       " '062019',\n",
       " '072014',\n",
       " '072015',\n",
       " '072016',\n",
       " '072017',\n",
       " '072018',\n",
       " '072019',\n",
       " '082014',\n",
       " '082015',\n",
       " '082016',\n",
       " '082017',\n",
       " '082018',\n",
       " '082019',\n",
       " '092014',\n",
       " '092015',\n",
       " '092016',\n",
       " '092017',\n",
       " '092018',\n",
       " '092019',\n",
       " '102014',\n",
       " '102015',\n",
       " '102016',\n",
       " '102017',\n",
       " '102018',\n",
       " '102019',\n",
       " '112014',\n",
       " '112015',\n",
       " '112016',\n",
       " '112017',\n",
       " '112018',\n",
       " '112019',\n",
       " '122014',\n",
       " '122015',\n",
       " '122016',\n",
       " '122017',\n",
       " '122018',\n",
       " '122019']"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "linke_rel=linke_rel.groupby(['date'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "linke_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "linke_rel[\"nlpprocessed\"]=linke_rel['content'].apply(nlp_pipeline)\n",
    "linke_rel_final=linke_rel\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "linke_rel_final['liststring'] = [','.join(map(str, l)) for l in linke_rel_final['nlpprocessed']]\n",
    "linke_rel_final\n",
    "\n",
    "\n",
    "linke_rel_final['positive words'] = linke_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "linke_rel_final['negative words'] = linke_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "\n",
    "\n",
    "conditions = [\n",
    "(linke_rel_final['positive words'] > linke_rel_final['negative words']),\n",
    "(linke_rel_final['negative words'] > linke_rel_final['positive words']),\n",
    "(linke_rel_final['negative words'] == linke_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "linke_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "linke_rel_final\n",
    "\n",
    "\n",
    "#clean dataset \n",
    "del linke_rel_final['nlpprocessed']\n",
    "linke_rel_final = linke_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"date\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "linke_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/linke_sentiment.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Afd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Weidel: Christliche Asylbewerber besser schützen!</td>\n",
       "      <td>2017-04-25</td>\n",
       "      <td>Zur grassierenden Gewalt gegen Christen in Asy...</td>\n",
       "      <td>[0.546444]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gauland: Wer keinen Asylanspruch hat, muss abg...</td>\n",
       "      <td>2014-08-12</td>\n",
       "      <td>Berlin, 12. August 2014   Der Völkermord der I...</td>\n",
       "      <td>[0.5103805]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Beatrix von Storch: Wir brauchen dringend die ...</td>\n",
       "      <td>2017-03-07</td>\n",
       "      <td>Berlin, 7. März 2017. Der EuGH hat entschieden...</td>\n",
       "      <td>[0.50531623]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pazderski: Die meisten Muslime sind in Deutsch...</td>\n",
       "      <td>2017-10-02</td>\n",
       "      <td>Berlin, 02. Oktober 2017. Zur aktuellen Studie...</td>\n",
       "      <td>[0.50309138]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Alice Weidel: Präventivmaßnahmen gegen Sexuals...</td>\n",
       "      <td>2017-09-04</td>\n",
       "      <td>Berlin, 4. September 2017. Erneut wurde eine J...</td>\n",
       "      <td>[0.50242066]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title        date  \\\n",
       "0  Weidel: Christliche Asylbewerber besser schützen!  2017-04-25   \n",
       "1  Gauland: Wer keinen Asylanspruch hat, muss abg...  2014-08-12   \n",
       "2  Beatrix von Storch: Wir brauchen dringend die ...  2017-03-07   \n",
       "3  Pazderski: Die meisten Muslime sind in Deutsch...  2017-10-02   \n",
       "4  Alice Weidel: Präventivmaßnahmen gegen Sexuals...  2017-09-04   \n",
       "\n",
       "                                             content         score  \\\n",
       "0  Zur grassierenden Gewalt gegen Christen in Asy...    [0.546444]   \n",
       "1  Berlin, 12. August 2014   Der Völkermord der I...   [0.5103805]   \n",
       "2  Berlin, 7. März 2017. Der EuGH hat entschieden...  [0.50531623]   \n",
       "3  Berlin, 02. Oktober 2017. Zur aktuellen Studie...  [0.50309138]   \n",
       "4  Berlin, 4. September 2017. Erneut wurde eine J...  [0.50242066]   \n",
       "\n",
       "     how_included  n_words  \n",
       "0  OVER-THRESHOLD      192  \n",
       "1  OVER-THRESHOLD      183  \n",
       "2  OVER-THRESHOLD      188  \n",
       "3  OVER-THRESHOLD      249  \n",
       "4  OVER-THRESHOLD      198  "
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#here we are using the file that only contains refugee relevant content\n",
    "afd_rel = pd.read_csv(\"/Users/ashrakatelshehawy/afd_refugeerelevant.csv\", encoding='utf-8', delimiter='\\t',header=None)\n",
    "afd_rel.columns = ['title', 'date',\"content\",\"score\",\"how_included\",\"n_words\"]\n",
    "\n",
    "afd_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>113</th>\n",
       "      <td>Alexander Gauland: Das BAMF hat keine Kontroll...</td>\n",
       "      <td>2017-05-05</td>\n",
       "      <td>Berlin, 5. Mai 2017. Zum Skandal um Franco A. ...</td>\n",
       "      <td>[0.42517914]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>Georg Pazderski: Biologische Altersfeststellun...</td>\n",
       "      <td>2018-01-09</td>\n",
       "      <td>Berlin, 9. Januar 2018. Einer repräsentativen ...</td>\n",
       "      <td>[0.46594369]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>171</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Alice Weidel: Präventivmaßnahmen gegen Sexuals...</td>\n",
       "      <td>2017-09-04</td>\n",
       "      <td>Berlin, 4. September 2017. Erneut wurde eine J...</td>\n",
       "      <td>[0.50242066]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Georg Pazderski: Malu Dreyer will unseren Rech...</td>\n",
       "      <td>2017-10-19</td>\n",
       "      <td>Berlin, 19. Oktober 2017. Zum Vorschlag von Ma...</td>\n",
       "      <td>[0.46317506]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>281</th>\n",
       "      <td>Petry: Europol und Polizeigewerkschaft strafen...</td>\n",
       "      <td>2016-01-26</td>\n",
       "      <td>Berlin, 26. Januar 2016. „Der Versuch der Kons...</td>\n",
       "      <td>[0.37793562]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>Meuthen/Beck: Gegen globale Einwanderung in di...</td>\n",
       "      <td>2018-11-09</td>\n",
       "      <td>Berlin, 9. November 2018. Zur Debatte um den „...</td>\n",
       "      <td>[0.43945099]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>425</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>236</th>\n",
       "      <td>Von Storch: Mit Vertretern wie Bischof Dröge, ...</td>\n",
       "      <td>2016-10-28</td>\n",
       "      <td>Berlin, 28. Oktober 2016. Zu den Aussagen des ...</td>\n",
       "      <td>[0.38828737]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>272</th>\n",
       "      <td>Alexander Gauland: Die grünen Multikulti-Träum...</td>\n",
       "      <td>2018-11-13</td>\n",
       "      <td>Berlin, 13. November 2018. Nachdem der GRÜNEN-...</td>\n",
       "      <td>[0.38014181]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>377</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>Gauland: Wir Deutschen sitzen in der Falle!</td>\n",
       "      <td>2016-01-05</td>\n",
       "      <td>Berlin, 5. Januar 2016. Der Flüchtlingsstrom n...</td>\n",
       "      <td>[0.37537733]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>161</th>\n",
       "      <td>Driesang: Christenverfolgung in Deutschland</td>\n",
       "      <td>2016-11-24</td>\n",
       "      <td>Berlin, 24. November 2016. Zur Verfolgung chri...</td>\n",
       "      <td>[0.40843683]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>318</th>\n",
       "      <td>Henkel: Die AfD wirkt! Die SPD übernimmt Einwa...</td>\n",
       "      <td>2015-03-04</td>\n",
       "      <td>Zum Vorhaben der SPD, ein Einwanderungsgesetz ...</td>\n",
       "      <td>[0.37483843]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>129</th>\n",
       "      <td>Pazderski: Ethnisches Profiling ist unerlässlich</td>\n",
       "      <td>2017-01-06</td>\n",
       "      <td>Berlin, 6. Januar 2017. Zur Debatte um die Kri...</td>\n",
       "      <td>[0.41893734]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>309</th>\n",
       "      <td>Henkel: Die AfD wirkt! Die SPD übernimmt Einwa...</td>\n",
       "      <td>2015-03-04</td>\n",
       "      <td>Zum Vorhaben der SPD, ein Einwanderungsgesetz ...</td>\n",
       "      <td>[0.37483843]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>439</th>\n",
       "      <td>Weidel: Die Menschen lassen sich nicht mehr fü...</td>\n",
       "      <td>2016-11-14</td>\n",
       "      <td>Berlin, 14. November 2016. Zu den Äußerungen v...</td>\n",
       "      <td>[0.34527347]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>195</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>255</th>\n",
       "      <td>Pazderski: Kiesewetter – Ideologie vor Verstand</td>\n",
       "      <td>2016-01-22</td>\n",
       "      <td>Berlin, 22. Januar 2016. Im Focus online Inter...</td>\n",
       "      <td>[0.383668]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>Alice Weidel: Sozialleistungen für EU-weit dop...</td>\n",
       "      <td>2018-12-17</td>\n",
       "      <td>Berlin, 17. Dezember 2018. Die Forderung der M...</td>\n",
       "      <td>[0.42145842]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>70</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>174</th>\n",
       "      <td>Georg Pazderski: Finanzielle Unterstützung für...</td>\n",
       "      <td>2018-09-05</td>\n",
       "      <td>Berlin, 5. September 2018. Der stellvertretend...</td>\n",
       "      <td>[0.40557093]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>370</th>\n",
       "      <td>Weidel: Alterssicherungsbericht der Bundesregi...</td>\n",
       "      <td>2016-10-26</td>\n",
       "      <td>Berlin, 26. Oktober 2016. Zum „Alterssicherung...</td>\n",
       "      <td>[0.36398013]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>271</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>411</th>\n",
       "      <td>Petry: Asselborn steht vor dem EU-Scherbenhaufen</td>\n",
       "      <td>2015-11-10</td>\n",
       "      <td>Berlin, 10. November 2015. Zu den aktuellen Äu...</td>\n",
       "      <td>[0.35212422]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>380</th>\n",
       "      <td>Alice Weidel: Erlass für Salzgitter zeigt, das...</td>\n",
       "      <td>2017-10-12</td>\n",
       "      <td>Berlin, 12. Oktober 2017. Niedersachsen verbie...</td>\n",
       "      <td>[0.36105958]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>Georg Pazderski: EU-Mission ermöglicht den Sch...</td>\n",
       "      <td>2017-07-26</td>\n",
       "      <td>Berlin, 26. Juli 2017. Zur Verlängerung der EU...</td>\n",
       "      <td>[0.38420373]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>171</th>\n",
       "      <td>Georg Pazderski: Innenminister gehen fahrlässi...</td>\n",
       "      <td>2019-02-05</td>\n",
       "      <td>Berlin, 5. Februar 2019. Der stellvertretende ...</td>\n",
       "      <td>[0.40598127]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>292</th>\n",
       "      <td>Gauland: Merkel verordnet Kontrollverlust</td>\n",
       "      <td>2016-06-14</td>\n",
       "      <td>Berlin, 14. Juni 2016. Zur Asylkrise erklärt d...</td>\n",
       "      <td>[0.37561781]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>Andreas Kalbitz: Islamisten sind für unsere Ge...</td>\n",
       "      <td>2019-01-16</td>\n",
       "      <td>Berlin, 16. Januar 2019. Radikale Islamisten u...</td>\n",
       "      <td>[0.45139669]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>274</th>\n",
       "      <td>Gauland: Ein trauriger Rekord</td>\n",
       "      <td>2015-12-09</td>\n",
       "      <td>Berlin, 9. Dezember 2015. Zur Registrierung de...</td>\n",
       "      <td>[0.37970223]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>205</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 title        date  \\\n",
       "113  Alexander Gauland: Das BAMF hat keine Kontroll...  2017-05-05   \n",
       "30   Georg Pazderski: Biologische Altersfeststellun...  2018-01-09   \n",
       "4    Alice Weidel: Präventivmaßnahmen gegen Sexuals...  2017-09-04   \n",
       "37   Georg Pazderski: Malu Dreyer will unseren Rech...  2017-10-19   \n",
       "281  Petry: Europol und Polizeigewerkschaft strafen...  2016-01-26   \n",
       "75   Meuthen/Beck: Gegen globale Einwanderung in di...  2018-11-09   \n",
       "236  Von Storch: Mit Vertretern wie Bischof Dröge, ...  2016-10-28   \n",
       "272  Alexander Gauland: Die grünen Multikulti-Träum...  2018-11-13   \n",
       "293        Gauland: Wir Deutschen sitzen in der Falle!  2016-01-05   \n",
       "161        Driesang: Christenverfolgung in Deutschland  2016-11-24   \n",
       "318  Henkel: Die AfD wirkt! Die SPD übernimmt Einwa...  2015-03-04   \n",
       "129   Pazderski: Ethnisches Profiling ist unerlässlich  2017-01-06   \n",
       "309  Henkel: Die AfD wirkt! Die SPD übernimmt Einwa...  2015-03-04   \n",
       "439  Weidel: Die Menschen lassen sich nicht mehr fü...  2016-11-14   \n",
       "255    Pazderski: Kiesewetter – Ideologie vor Verstand  2016-01-22   \n",
       "123  Alice Weidel: Sozialleistungen für EU-weit dop...  2018-12-17   \n",
       "174  Georg Pazderski: Finanzielle Unterstützung für...  2018-09-05   \n",
       "370  Weidel: Alterssicherungsbericht der Bundesregi...  2016-10-26   \n",
       "411   Petry: Asselborn steht vor dem EU-Scherbenhaufen  2015-11-10   \n",
       "380  Alice Weidel: Erlass für Salzgitter zeigt, das...  2017-10-12   \n",
       "251  Georg Pazderski: EU-Mission ermöglicht den Sch...  2017-07-26   \n",
       "171  Georg Pazderski: Innenminister gehen fahrlässi...  2019-02-05   \n",
       "292          Gauland: Merkel verordnet Kontrollverlust  2016-06-14   \n",
       "57   Andreas Kalbitz: Islamisten sind für unsere Ge...  2019-01-16   \n",
       "274                      Gauland: Ein trauriger Rekord  2015-12-09   \n",
       "\n",
       "                                               content         score  \\\n",
       "113  Berlin, 5. Mai 2017. Zum Skandal um Franco A. ...  [0.42517914]   \n",
       "30   Berlin, 9. Januar 2018. Einer repräsentativen ...  [0.46594369]   \n",
       "4    Berlin, 4. September 2017. Erneut wurde eine J...  [0.50242066]   \n",
       "37   Berlin, 19. Oktober 2017. Zum Vorschlag von Ma...  [0.46317506]   \n",
       "281  Berlin, 26. Januar 2016. „Der Versuch der Kons...  [0.37793562]   \n",
       "75   Berlin, 9. November 2018. Zur Debatte um den „...  [0.43945099]   \n",
       "236  Berlin, 28. Oktober 2016. Zu den Aussagen des ...  [0.38828737]   \n",
       "272  Berlin, 13. November 2018. Nachdem der GRÜNEN-...  [0.38014181]   \n",
       "293  Berlin, 5. Januar 2016. Der Flüchtlingsstrom n...  [0.37537733]   \n",
       "161  Berlin, 24. November 2016. Zur Verfolgung chri...  [0.40843683]   \n",
       "318  Zum Vorhaben der SPD, ein Einwanderungsgesetz ...  [0.37483843]   \n",
       "129  Berlin, 6. Januar 2017. Zur Debatte um die Kri...  [0.41893734]   \n",
       "309  Zum Vorhaben der SPD, ein Einwanderungsgesetz ...  [0.37483843]   \n",
       "439  Berlin, 14. November 2016. Zu den Äußerungen v...  [0.34527347]   \n",
       "255  Berlin, 22. Januar 2016. Im Focus online Inter...    [0.383668]   \n",
       "123  Berlin, 17. Dezember 2018. Die Forderung der M...  [0.42145842]   \n",
       "174  Berlin, 5. September 2018. Der stellvertretend...  [0.40557093]   \n",
       "370  Berlin, 26. Oktober 2016. Zum „Alterssicherung...  [0.36398013]   \n",
       "411  Berlin, 10. November 2015. Zu den aktuellen Äu...  [0.35212422]   \n",
       "380  Berlin, 12. Oktober 2017. Niedersachsen verbie...  [0.36105958]   \n",
       "251  Berlin, 26. Juli 2017. Zur Verlängerung der EU...  [0.38420373]   \n",
       "171  Berlin, 5. Februar 2019. Der stellvertretende ...  [0.40598127]   \n",
       "292  Berlin, 14. Juni 2016. Zur Asylkrise erklärt d...  [0.37561781]   \n",
       "57   Berlin, 16. Januar 2019. Radikale Islamisten u...  [0.45139669]   \n",
       "274  Berlin, 9. Dezember 2015. Zur Registrierung de...  [0.37970223]   \n",
       "\n",
       "       how_included  n_words  \n",
       "113  OVER-THRESHOLD      150  \n",
       "30   OVER-THRESHOLD      171  \n",
       "4    OVER-THRESHOLD      198  \n",
       "37   OVER-THRESHOLD      141  \n",
       "281  OVER-THRESHOLD      252  \n",
       "75   OVER-THRESHOLD      425  \n",
       "236  OVER-THRESHOLD      237  \n",
       "272  OVER-THRESHOLD      377  \n",
       "293  OVER-THRESHOLD      166  \n",
       "161  OVER-THRESHOLD      267  \n",
       "318  OVER-THRESHOLD      218  \n",
       "129  OVER-THRESHOLD      228  \n",
       "309  OVER-THRESHOLD      218  \n",
       "439  OVER-THRESHOLD      195  \n",
       "255  OVER-THRESHOLD      278  \n",
       "123  OVER-THRESHOLD       70  \n",
       "174  OVER-THRESHOLD      154  \n",
       "370  OVER-THRESHOLD      271  \n",
       "411  OVER-THRESHOLD      323  \n",
       "380  OVER-THRESHOLD      192  \n",
       "251  OVER-THRESHOLD      142  \n",
       "171  OVER-THRESHOLD      264  \n",
       "292  OVER-THRESHOLD      182  \n",
       "57   OVER-THRESHOLD      176  \n",
       "274  OVER-THRESHOLD      205  "
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "afd_rel_sample=afd_rel.sample(25)\n",
    "afd_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "#give column names\n",
    "afd_rel.columns = ['title','date', 'content',\"similarity score\",\"how_included\",\"n_words\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['2017-04-25',\n",
       " '2014-08-12',\n",
       " '2017-03-07',\n",
       " '2017-10-02',\n",
       " '2017-09-04',\n",
       " '2015-09-25',\n",
       " '2019-02-04',\n",
       " '2017-06-08',\n",
       " '2017-08-22',\n",
       " '2015-10-19',\n",
       " '2017-05-05',\n",
       " '2016-07-27',\n",
       " '2017-12-08',\n",
       " '2018-11-26',\n",
       " '2016-06-29',\n",
       " '2017-06-13',\n",
       " '2015-08-24',\n",
       " '2017-08-29',\n",
       " '2017-10-13',\n",
       " '2018-07-05',\n",
       " '2018-02-05',\n",
       " '2016-10-10',\n",
       " '2018-08-15',\n",
       " '2017-06-14',\n",
       " '2017-06-06',\n",
       " '2016-01-05',\n",
       " '2017-09-20',\n",
       " '2015-01-04',\n",
       " '2015-01-04',\n",
       " '2019-02-26',\n",
       " '2018-01-09',\n",
       " '2018-03-20',\n",
       " '2019-08-12',\n",
       " '2016-01-25',\n",
       " '2016-01-25',\n",
       " '2014-09-05',\n",
       " '2019-08-19',\n",
       " '2017-10-19',\n",
       " '2017-02-07',\n",
       " '2016-08-30',\n",
       " '2018-11-15',\n",
       " '2019-04-08',\n",
       " '2019-04-10',\n",
       " '2017-07-17',\n",
       " '2019-09-14',\n",
       " '2019-06-24',\n",
       " '2017-03-03',\n",
       " '2016-01-08',\n",
       " '2016-01-08',\n",
       " '2018-12-17',\n",
       " '2019-01-04',\n",
       " '2018-01-17',\n",
       " '2017-08-16',\n",
       " '2017-01-03',\n",
       " '2015-09-10',\n",
       " '2016-11-18',\n",
       " '2016-07-21',\n",
       " '2019-01-16',\n",
       " '2017-09-21',\n",
       " '2017-06-06',\n",
       " '2015-12-02',\n",
       " '2014-08-27',\n",
       " '2016-03-07',\n",
       " '2016-03-08',\n",
       " '2016-01-28',\n",
       " '2016-01-28',\n",
       " '2019-07-23',\n",
       " '2016-11-15',\n",
       " '2016-03-01',\n",
       " '2015-09-11',\n",
       " '2018-03-27',\n",
       " '2018-12-14',\n",
       " '2017-05-18',\n",
       " '2019-05-17',\n",
       " '2016-05-19',\n",
       " '2018-11-09',\n",
       " '2015-11-30',\n",
       " '2017-08-13',\n",
       " '2013-10-11',\n",
       " '2018-07-26',\n",
       " '2017-09-22',\n",
       " '2016-03-26',\n",
       " '2016-06-04',\n",
       " '2018-05-31',\n",
       " '2018-04-17',\n",
       " '2017-01-06',\n",
       " '2015-11-05',\n",
       " '2017-07-26',\n",
       " '2019-07-01',\n",
       " '2019-09-22',\n",
       " '2017-02-08',\n",
       " '2017-06-15',\n",
       " '2018-01-04',\n",
       " '2015-03-24',\n",
       " '2015-08-25',\n",
       " '2015-08-25',\n",
       " '2015-08-25',\n",
       " '2015-08-25',\n",
       " '2019-04-17',\n",
       " '2017-08-30',\n",
       " '2015-08-27',\n",
       " '2019-03-11',\n",
       " '2014-08-26',\n",
       " '2015-09-07',\n",
       " '2019-02-26',\n",
       " '2014-09-03',\n",
       " '2019-03-25',\n",
       " '2019-09-06',\n",
       " '2018-03-23',\n",
       " '2017-03-07',\n",
       " '2017-08-17',\n",
       " '2016-11-16',\n",
       " '2016-05-30',\n",
       " '2017-05-05',\n",
       " '2019-02-08',\n",
       " '2017-04-25',\n",
       " '2017-01-06',\n",
       " '2019-09-22',\n",
       " '2016-07-13',\n",
       " '2017-08-29',\n",
       " '2017-01-22',\n",
       " '2018-04-18',\n",
       " '2017-02-20',\n",
       " '2018-12-17',\n",
       " '2015-04-21',\n",
       " '2018-11-15',\n",
       " '2015-08-26',\n",
       " '2015-08-26',\n",
       " '2017-03-08',\n",
       " '2017-01-06',\n",
       " '2016-04-06',\n",
       " '2017-04-25',\n",
       " '2018-06-13',\n",
       " '2017-12-04',\n",
       " '2017-01-02',\n",
       " '2019-03-11',\n",
       " '2017-03-09',\n",
       " '2016-01-29',\n",
       " '2016-10-18',\n",
       " '2017-06-28',\n",
       " '2017-07-18',\n",
       " '2018-11-26',\n",
       " '2019-04-16',\n",
       " '2016-09-22',\n",
       " '2018-02-01',\n",
       " '2019-07-10',\n",
       " '2017-10-11',\n",
       " '2017-01-09',\n",
       " '2017-06-30',\n",
       " '2017-11-14',\n",
       " '2018-07-06',\n",
       " '2016-09-14',\n",
       " '2018-11-20',\n",
       " '2016-03-11',\n",
       " '2016-01-25',\n",
       " '2016-01-25',\n",
       " '2017-07-04',\n",
       " '2018-11-27',\n",
       " '2015-12-29',\n",
       " '2019-03-26',\n",
       " '2017-07-21',\n",
       " '2016-11-24',\n",
       " '2017-02-15',\n",
       " '2015-01-13',\n",
       " '2015-01-13',\n",
       " '2015-09-28',\n",
       " '2014-09-22',\n",
       " '2016-10-11',\n",
       " '2019-01-30',\n",
       " '2019-03-22',\n",
       " '2017-07-25',\n",
       " '2019-02-05',\n",
       " '2016-07-04',\n",
       " '2019-02-27',\n",
       " '2018-09-05',\n",
       " '2017-04-25',\n",
       " '2017-08-07',\n",
       " '2015-09-24',\n",
       " '2014-07-31',\n",
       " '2016-04-04',\n",
       " '2017-05-04',\n",
       " '2016-10-13',\n",
       " '2019-07-19',\n",
       " '2017-02-14',\n",
       " '2017-02-20',\n",
       " '2018-02-22',\n",
       " '2016-07-25',\n",
       " '2016-04-14',\n",
       " '2017-11-30',\n",
       " '2016-03-16',\n",
       " '2017-09-06',\n",
       " '2015-10-15',\n",
       " '2018-04-13',\n",
       " '2016-03-10',\n",
       " '2017-06-20',\n",
       " '2016-07-26',\n",
       " '2016-10-17',\n",
       " '2015-07-23',\n",
       " '2015-07-23',\n",
       " '2019-07-23',\n",
       " '2016-10-14',\n",
       " '2016-10-07',\n",
       " '2018-06-07',\n",
       " '2017-04-05',\n",
       " '2018-08-20',\n",
       " '2015-12-31',\n",
       " '2015-12-31',\n",
       " '2019-09-04',\n",
       " '2014-12-15',\n",
       " '2017-01-19',\n",
       " '2017-04-25',\n",
       " '2017-04-25',\n",
       " '2017-07-27',\n",
       " '2017-10-23',\n",
       " '2018-06-27',\n",
       " '2015-11-12',\n",
       " '2015-07-27',\n",
       " '2017-09-08',\n",
       " '2017-12-19',\n",
       " '2015-10-05',\n",
       " '2018-04-20',\n",
       " '2015-10-01',\n",
       " '2016-04-27',\n",
       " '2017-10-09',\n",
       " '2016-03-03',\n",
       " '2016-06-09',\n",
       " '2018-10-18',\n",
       " '2016-11-25',\n",
       " '2019-05-29',\n",
       " '2017-07-05',\n",
       " '2015-09-09',\n",
       " '2018-06-26',\n",
       " '2017-12-01',\n",
       " '2015-10-22',\n",
       " '2015-12-07',\n",
       " '2016-07-07',\n",
       " '2016-10-28',\n",
       " '2014-09-24',\n",
       " '2017-09-19',\n",
       " '2017-03-29',\n",
       " '2015-12-14',\n",
       " '2019-07-26',\n",
       " '2015-10-26',\n",
       " '2017-04-25',\n",
       " '2016-09-12',\n",
       " '2016-02-05',\n",
       " '2016-02-18',\n",
       " '2016-01-20',\n",
       " '2016-01-20',\n",
       " '2019-03-11',\n",
       " '2015-08-13',\n",
       " '2017-07-26',\n",
       " '2017-09-20',\n",
       " '2016-08-05',\n",
       " '2016-01-22',\n",
       " '2016-01-22',\n",
       " '2017-03-14',\n",
       " '2017-02-06',\n",
       " '2016-10-17',\n",
       " '2015-08-11',\n",
       " '2016-07-18',\n",
       " '2018-04-13',\n",
       " '2019-09-23',\n",
       " '2017-04-27',\n",
       " '2018-10-25',\n",
       " '2017-08-29',\n",
       " '2017-12-23',\n",
       " '2018-11-23',\n",
       " '2015-11-25',\n",
       " '2019-07-05',\n",
       " '2015-10-01',\n",
       " '2019-02-05',\n",
       " '2018-11-13',\n",
       " '2017-06-16',\n",
       " '2015-12-09',\n",
       " '2016-01-06',\n",
       " '2016-01-06',\n",
       " '2014-04-23',\n",
       " '2017-04-12',\n",
       " '2017-09-14',\n",
       " '2017-03-01',\n",
       " '2016-01-26',\n",
       " '2017-09-20',\n",
       " '2016-11-16',\n",
       " '2016-02-11',\n",
       " '2015-12-22',\n",
       " '2015-09-13',\n",
       " '2016-06-29',\n",
       " '2016-06-28',\n",
       " '2018-01-19',\n",
       " '2017-04-25',\n",
       " '2017-07-24',\n",
       " '2016-06-14',\n",
       " '2016-01-05',\n",
       " '2016-01-05',\n",
       " '2016-05-24',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2015-03-04',\n",
       " '2018-12-03',\n",
       " '2019-07-29',\n",
       " '2017-08-30',\n",
       " '2017-06-28',\n",
       " '2015-06-20',\n",
       " '2018-06-27',\n",
       " '2018-09-01',\n",
       " '2017-09-14',\n",
       " '2016-07-13',\n",
       " '2017-02-03',\n",
       " '2019-04-16',\n",
       " '2015-08-29',\n",
       " '2017-12-20',\n",
       " '2017-01-12',\n",
       " '2019-07-24',\n",
       " '2015-12-04',\n",
       " '2017-09-07',\n",
       " '2019-03-21',\n",
       " '2017-10-06',\n",
       " '2017-09-07',\n",
       " '2016-10-05',\n",
       " '2017-09-12',\n",
       " '2017-02-09',\n",
       " '2017-09-11',\n",
       " '2016-09-08',\n",
       " '2017-02-10',\n",
       " '2019-02-18',\n",
       " '2018-06-14',\n",
       " '2016-01-13',\n",
       " '2016-01-13',\n",
       " '2017-04-25',\n",
       " '2018-10-31',\n",
       " '2017-03-03',\n",
       " '2015-10-05',\n",
       " '2017-09-01',\n",
       " '2015-11-20',\n",
       " '2016-08-12',\n",
       " '2019-06-18',\n",
       " '2016-10-26',\n",
       " '2018-03-13',\n",
       " '2015-11-02',\n",
       " '2017-11-02',\n",
       " '2018-09-06',\n",
       " '2017-05-19',\n",
       " '2015-11-18',\n",
       " '2016-03-03',\n",
       " '2017-07-21',\n",
       " '2016-10-27',\n",
       " '2017-10-12',\n",
       " '2016-09-26',\n",
       " '2016-11-08',\n",
       " '2019-05-20',\n",
       " '2017-06-28',\n",
       " '2016-03-03',\n",
       " '2017-02-01',\n",
       " '2017-07-24',\n",
       " '2015-08-20',\n",
       " '2014-08-11',\n",
       " '2016-09-13',\n",
       " '2015-09-25',\n",
       " '2015-09-25',\n",
       " '2014-12-16',\n",
       " '2015-09-08',\n",
       " '2017-09-19',\n",
       " '2015-10-26',\n",
       " '2018-11-20',\n",
       " '2018-05-27',\n",
       " '2016-05-11',\n",
       " '2014-12-15',\n",
       " '2015-02-20',\n",
       " '2017-09-08',\n",
       " '2018-06-20',\n",
       " '2017-12-11',\n",
       " '2018-07-24',\n",
       " '2019-02-15',\n",
       " '2015-09-25',\n",
       " '2016-05-12',\n",
       " '2017-06-29',\n",
       " '2017-02-28',\n",
       " '2015-11-10',\n",
       " '2017-08-15',\n",
       " '2016-06-02',\n",
       " '2019-04-15',\n",
       " '2019-01-30',\n",
       " '2016-02-19',\n",
       " '2019-01-20',\n",
       " '2018-04-09',\n",
       " '2019-11-01',\n",
       " '2016-06-09',\n",
       " '2016-08-18',\n",
       " '2019-07-01',\n",
       " '2018-09-10',\n",
       " '2017-09-12',\n",
       " '2016-02-11',\n",
       " '2015-12-01',\n",
       " '2015-12-01',\n",
       " '2015-09-11',\n",
       " '2016-09-09',\n",
       " '2019-09-19',\n",
       " '2015-11-03',\n",
       " '2015-01-20',\n",
       " '2015-10-12',\n",
       " '2017-01-24',\n",
       " '2019-10-18',\n",
       " '2016-05-04',\n",
       " '2016-12-02',\n",
       " '2018-05-30',\n",
       " '2016-11-14',\n",
       " '2019-05-22',\n",
       " '2017-04-25',\n",
       " '2017-11-29',\n",
       " '2017-03-13',\n",
       " '2015-10-20',\n",
       " '2018-01-30',\n",
       " '2017-08-18',\n",
       " '2015-10-28',\n",
       " '2016-02-04',\n",
       " '2016-11-07',\n",
       " '2015-10-19',\n",
       " '2017-08-03',\n",
       " '2019-09-17',\n",
       " '2017-01-22',\n",
       " '2016-09-23',\n",
       " '2019-07-23',\n",
       " '2016-12-16',\n",
       " '2019-08-06',\n",
       " '2016-01-20',\n",
       " '2018-07-05',\n",
       " '2016-08-17']"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "afd_rel['date'].values.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>similarity score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Empty DataFrame\n",
       "Columns: [title, date, content, similarity score, how_included, n_words]\n",
       "Index: []"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "afd_rel_nan = afd_rel[afd_rel.isna().any(axis=1)]\n",
    "afd_rel_nan\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "afd_rel[['year','month',\"day\"]] = afd_rel['date'].str.split('-',expand=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>similarity score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Weidel: Christliche Asylbewerber besser schützen!</td>\n",
       "      <td>2017-04-25</td>\n",
       "      <td>Zur grassierenden Gewalt gegen Christen in Asy...</td>\n",
       "      <td>[0.546444]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>192</td>\n",
       "      <td>2017</td>\n",
       "      <td>04</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gauland: Wer keinen Asylanspruch hat, muss abg...</td>\n",
       "      <td>2014-08-12</td>\n",
       "      <td>Berlin, 12. August 2014   Der Völkermord der I...</td>\n",
       "      <td>[0.5103805]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>183</td>\n",
       "      <td>2014</td>\n",
       "      <td>08</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Beatrix von Storch: Wir brauchen dringend die ...</td>\n",
       "      <td>2017-03-07</td>\n",
       "      <td>Berlin, 7. März 2017. Der EuGH hat entschieden...</td>\n",
       "      <td>[0.50531623]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>188</td>\n",
       "      <td>2017</td>\n",
       "      <td>03</td>\n",
       "      <td>07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pazderski: Die meisten Muslime sind in Deutsch...</td>\n",
       "      <td>2017-10-02</td>\n",
       "      <td>Berlin, 02. Oktober 2017. Zur aktuellen Studie...</td>\n",
       "      <td>[0.50309138]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>249</td>\n",
       "      <td>2017</td>\n",
       "      <td>10</td>\n",
       "      <td>02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Alice Weidel: Präventivmaßnahmen gegen Sexuals...</td>\n",
       "      <td>2017-09-04</td>\n",
       "      <td>Berlin, 4. September 2017. Erneut wurde eine J...</td>\n",
       "      <td>[0.50242066]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "      <td>2017</td>\n",
       "      <td>09</td>\n",
       "      <td>04</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title        date  \\\n",
       "0  Weidel: Christliche Asylbewerber besser schützen!  2017-04-25   \n",
       "1  Gauland: Wer keinen Asylanspruch hat, muss abg...  2014-08-12   \n",
       "2  Beatrix von Storch: Wir brauchen dringend die ...  2017-03-07   \n",
       "3  Pazderski: Die meisten Muslime sind in Deutsch...  2017-10-02   \n",
       "4  Alice Weidel: Präventivmaßnahmen gegen Sexuals...  2017-09-04   \n",
       "\n",
       "                                             content similarity score  \\\n",
       "0  Zur grassierenden Gewalt gegen Christen in Asy...       [0.546444]   \n",
       "1  Berlin, 12. August 2014   Der Völkermord der I...      [0.5103805]   \n",
       "2  Berlin, 7. März 2017. Der EuGH hat entschieden...     [0.50531623]   \n",
       "3  Berlin, 02. Oktober 2017. Zur aktuellen Studie...     [0.50309138]   \n",
       "4  Berlin, 4. September 2017. Erneut wurde eine J...     [0.50242066]   \n",
       "\n",
       "     how_included  n_words  year month day  \n",
       "0  OVER-THRESHOLD      192  2017    04  25  \n",
       "1  OVER-THRESHOLD      183  2014    08  12  \n",
       "2  OVER-THRESHOLD      188  2017    03  07  \n",
       "3  OVER-THRESHOLD      249  2017    10  02  \n",
       "4  OVER-THRESHOLD      198  2017    09  04  "
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "afd_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>similarity score</th>\n",
       "      <th>how_included</th>\n",
       "      <th>n_words</th>\n",
       "      <th>year</th>\n",
       "      <th>month</th>\n",
       "      <th>day</th>\n",
       "      <th>my</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Weidel: Christliche Asylbewerber besser schützen!</td>\n",
       "      <td>2017-04-25</td>\n",
       "      <td>Weidel: Christliche Asylbewerber besser schütz...</td>\n",
       "      <td>[0.546444]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>192</td>\n",
       "      <td>2017</td>\n",
       "      <td>04</td>\n",
       "      <td>25</td>\n",
       "      <td>042017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gauland: Wer keinen Asylanspruch hat, muss abg...</td>\n",
       "      <td>2014-08-12</td>\n",
       "      <td>Gauland: Wer keinen Asylanspruch hat, muss abg...</td>\n",
       "      <td>[0.5103805]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>183</td>\n",
       "      <td>2014</td>\n",
       "      <td>08</td>\n",
       "      <td>12</td>\n",
       "      <td>082014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Beatrix von Storch: Wir brauchen dringend die ...</td>\n",
       "      <td>2017-03-07</td>\n",
       "      <td>Beatrix von Storch: Wir brauchen dringend die ...</td>\n",
       "      <td>[0.50531623]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>188</td>\n",
       "      <td>2017</td>\n",
       "      <td>03</td>\n",
       "      <td>07</td>\n",
       "      <td>032017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pazderski: Die meisten Muslime sind in Deutsch...</td>\n",
       "      <td>2017-10-02</td>\n",
       "      <td>Pazderski: Die meisten Muslime sind in Deutsch...</td>\n",
       "      <td>[0.50309138]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>249</td>\n",
       "      <td>2017</td>\n",
       "      <td>10</td>\n",
       "      <td>02</td>\n",
       "      <td>102017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Alice Weidel: Präventivmaßnahmen gegen Sexuals...</td>\n",
       "      <td>2017-09-04</td>\n",
       "      <td>Alice Weidel: Präventivmaßnahmen gegen Sexuals...</td>\n",
       "      <td>[0.50242066]</td>\n",
       "      <td>OVER-THRESHOLD</td>\n",
       "      <td>198</td>\n",
       "      <td>2017</td>\n",
       "      <td>09</td>\n",
       "      <td>04</td>\n",
       "      <td>092017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                               title        date  \\\n",
       "0  Weidel: Christliche Asylbewerber besser schützen!  2017-04-25   \n",
       "1  Gauland: Wer keinen Asylanspruch hat, muss abg...  2014-08-12   \n",
       "2  Beatrix von Storch: Wir brauchen dringend die ...  2017-03-07   \n",
       "3  Pazderski: Die meisten Muslime sind in Deutsch...  2017-10-02   \n",
       "4  Alice Weidel: Präventivmaßnahmen gegen Sexuals...  2017-09-04   \n",
       "\n",
       "                                             content similarity score  \\\n",
       "0  Weidel: Christliche Asylbewerber besser schütz...       [0.546444]   \n",
       "1  Gauland: Wer keinen Asylanspruch hat, muss abg...      [0.5103805]   \n",
       "2  Beatrix von Storch: Wir brauchen dringend die ...     [0.50531623]   \n",
       "3  Pazderski: Die meisten Muslime sind in Deutsch...     [0.50309138]   \n",
       "4  Alice Weidel: Präventivmaßnahmen gegen Sexuals...     [0.50242066]   \n",
       "\n",
       "     how_included  n_words  year month day      my  \n",
       "0  OVER-THRESHOLD      192  2017    04  25  042017  \n",
       "1  OVER-THRESHOLD      183  2014    08  12  082014  \n",
       "2  OVER-THRESHOLD      188  2017    03  07  032017  \n",
       "3  OVER-THRESHOLD      249  2017    10  02  102017  \n",
       "4  OVER-THRESHOLD      198  2017    09  04  092017  "
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "afd_rel[\"my\"] = afd_rel[\"month\"] + afd_rel[\"year\"] \n",
    "afd_rel[\"content\"] = afd_rel[\"title\"] +[\" \"]+ afd_rel[\"content\"] \n",
    "\n",
    "afd_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>content</th>\n",
       "      <th>my</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Weidel: Christliche Asylbewerber besser schütz...</td>\n",
       "      <td>042017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Gauland: Wer keinen Asylanspruch hat, muss abg...</td>\n",
       "      <td>082014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Beatrix von Storch: Wir brauchen dringend die ...</td>\n",
       "      <td>032017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Pazderski: Die meisten Muslime sind in Deutsch...</td>\n",
       "      <td>102017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Alice Weidel: Präventivmaßnahmen gegen Sexuals...</td>\n",
       "      <td>092017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             content      my\n",
       "0  Weidel: Christliche Asylbewerber besser schütz...  042017\n",
       "1  Gauland: Wer keinen Asylanspruch hat, muss abg...  082014\n",
       "2  Beatrix von Storch: Wir brauchen dringend die ...  032017\n",
       "3  Pazderski: Die meisten Muslime sind in Deutsch...  102017\n",
       "4  Alice Weidel: Präventivmaßnahmen gegen Sexuals...  092017"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "del afd_rel['date']\n",
    "del afd_rel['title']\n",
    "del afd_rel['similarity score']\n",
    "del afd_rel['how_included']\n",
    "del afd_rel['n_words']\n",
    "del afd_rel['year']\n",
    "del afd_rel['month']\n",
    "del afd_rel['day']\n",
    "afd_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Weidel: Dem Bundessozialgericht liegt nichts a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Petry: Massenhafter Missbrauch von Frauen in K...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Poggenburg: Mindestlohn: Keine Ausnahmeregelun...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Georg Pazderski: Biologische Altersfeststellun...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Alexander Gauland: Die Asylkrise überfordert u...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content\n",
       "0  012015  Weidel: Dem Bundessozialgericht liegt nichts a...\n",
       "1  012016  Petry: Massenhafter Missbrauch von Frauen in K...\n",
       "2  012017  Poggenburg: Mindestlohn: Keine Ausnahmeregelun...\n",
       "3  012018  Georg Pazderski: Biologische Altersfeststellun...\n",
       "4  012019  Alexander Gauland: Die Asylkrise überfordert u..."
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "afd_rel3=afd_rel.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "afd_rel3.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "64"
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(afd_rel3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Weidel: Dem Bundessozialgericht liegt nichts a...</td>\n",
       "      <td>[weidel, bundessozialgericht, liegt, sozialsta...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Petry: Massenhafter Missbrauch von Frauen in K...</td>\n",
       "      <td>[petry, massenhafter, missbrauch, frauen, köln...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Poggenburg: Mindestlohn: Keine Ausnahmeregelun...</td>\n",
       "      <td>[poggenburg, mindestlohn, ausnahmeregelungen, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Georg Pazderski: Biologische Altersfeststellun...</td>\n",
       "      <td>[georg, pazderski, biologische, altersfeststel...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Alexander Gauland: Die Asylkrise überfordert u...</td>\n",
       "      <td>[alexander, gauland, asylkrise, überfordert, r...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>122014</td>\n",
       "      <td>Henkel: Die AfD verurteilt Brandanschläge auf ...</td>\n",
       "      <td>[henkel, afd, verurteilt, brandanschläge, asyl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>122015</td>\n",
       "      <td>Gauland: Die Slowaken wehren sich gegen Fremdb...</td>\n",
       "      <td>[gauland, slowaken, wehren, fremdbestimmung, b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>122016</td>\n",
       "      <td>Gauland: Migrantenzahlen steigen weiter – Frau...</td>\n",
       "      <td>[gauland, migrantenzahlen, steigen, frau, merk...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>122017</td>\n",
       "      <td>Georg Pazderski: Alter vermeintlich minderjähr...</td>\n",
       "      <td>[georg, pazderski, alter, vermeintlich, minder...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>122018</td>\n",
       "      <td>Alexander Gauland: 1 Million Hartz-IV-Bezieher...</td>\n",
       "      <td>[alexander, gauland, million, migranten, steue...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012015  Weidel: Dem Bundessozialgericht liegt nichts a...   \n",
       "1   012016  Petry: Massenhafter Missbrauch von Frauen in K...   \n",
       "2   012017  Poggenburg: Mindestlohn: Keine Ausnahmeregelun...   \n",
       "3   012018  Georg Pazderski: Biologische Altersfeststellun...   \n",
       "4   012019  Alexander Gauland: Die Asylkrise überfordert u...   \n",
       "..     ...                                                ...   \n",
       "59  122014  Henkel: Die AfD verurteilt Brandanschläge auf ...   \n",
       "60  122015  Gauland: Die Slowaken wehren sich gegen Fremdb...   \n",
       "61  122016  Gauland: Migrantenzahlen steigen weiter – Frau...   \n",
       "62  122017  Georg Pazderski: Alter vermeintlich minderjähr...   \n",
       "63  122018  Alexander Gauland: 1 Million Hartz-IV-Bezieher...   \n",
       "\n",
       "                                         nlpprocessed  \n",
       "0   [weidel, bundessozialgericht, liegt, sozialsta...  \n",
       "1   [petry, massenhafter, missbrauch, frauen, köln...  \n",
       "2   [poggenburg, mindestlohn, ausnahmeregelungen, ...  \n",
       "3   [georg, pazderski, biologische, altersfeststel...  \n",
       "4   [alexander, gauland, asylkrise, überfordert, r...  \n",
       "..                                                ...  \n",
       "59  [henkel, afd, verurteilt, brandanschläge, asyl...  \n",
       "60  [gauland, slowaken, wehren, fremdbestimmung, b...  \n",
       "61  [gauland, migrantenzahlen, steigen, frau, merk...  \n",
       "62  [georg, pazderski, alter, vermeintlich, minder...  \n",
       "63  [alexander, gauland, million, migranten, steue...  \n",
       "\n",
       "[64 rows x 3 columns]"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#nlp pipeline\n",
    "afd_rel3[\"nlpprocessed\"]=afd_rel3['content'].apply(nlp_pipeline)\n",
    "\n",
    "afd_rel3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Weidel: Dem Bundessozialgericht liegt nichts a...</td>\n",
       "      <td>[weidel, bundessozialgericht, liegt, sozialsta...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Petry: Massenhafter Missbrauch von Frauen in K...</td>\n",
       "      <td>[petry, massenhafter, missbrauch, frauen, köln...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Poggenburg: Mindestlohn: Keine Ausnahmeregelun...</td>\n",
       "      <td>[poggenburg, mindestlohn, ausnahmeregelungen, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Georg Pazderski: Biologische Altersfeststellun...</td>\n",
       "      <td>[georg, pazderski, biologische, altersfeststel...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Alexander Gauland: Die Asylkrise überfordert u...</td>\n",
       "      <td>[alexander, gauland, asylkrise, überfordert, r...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content  \\\n",
       "0  012015  Weidel: Dem Bundessozialgericht liegt nichts a...   \n",
       "1  012016  Petry: Massenhafter Missbrauch von Frauen in K...   \n",
       "2  012017  Poggenburg: Mindestlohn: Keine Ausnahmeregelun...   \n",
       "3  012018  Georg Pazderski: Biologische Altersfeststellun...   \n",
       "4  012019  Alexander Gauland: Die Asylkrise überfordert u...   \n",
       "\n",
       "                                        nlpprocessed  \n",
       "0  [weidel, bundessozialgericht, liegt, sozialsta...  \n",
       "1  [petry, massenhafter, missbrauch, frauen, köln...  \n",
       "2  [poggenburg, mindestlohn, ausnahmeregelungen, ...  \n",
       "3  [georg, pazderski, biologische, altersfeststel...  \n",
       "4  [alexander, gauland, asylkrise, überfordert, r...  "
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "\n",
    "afd_rel_final=afd_rel3\n",
    "\n",
    "afd_rel_final.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Weidel: Dem Bundessozialgericht liegt nichts a...</td>\n",
       "      <td>[weidel, bundessozialgericht, liegt, sozialsta...</td>\n",
       "      <td>weidel,bundessozialgericht,liegt,sozialstaat,b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Petry: Massenhafter Missbrauch von Frauen in K...</td>\n",
       "      <td>[petry, massenhafter, missbrauch, frauen, köln...</td>\n",
       "      <td>petry,massenhafter,missbrauch,frauen,köln,erin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Poggenburg: Mindestlohn: Keine Ausnahmeregelun...</td>\n",
       "      <td>[poggenburg, mindestlohn, ausnahmeregelungen, ...</td>\n",
       "      <td>poggenburg,mindestlohn,ausnahmeregelungen,asyl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Georg Pazderski: Biologische Altersfeststellun...</td>\n",
       "      <td>[georg, pazderski, biologische, altersfeststel...</td>\n",
       "      <td>georg,pazderski,biologische,altersfeststellung...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Alexander Gauland: Die Asylkrise überfordert u...</td>\n",
       "      <td>[alexander, gauland, asylkrise, überfordert, r...</td>\n",
       "      <td>alexander,gauland,asylkrise,überfordert,rechts...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>122014</td>\n",
       "      <td>Henkel: Die AfD verurteilt Brandanschläge auf ...</td>\n",
       "      <td>[henkel, afd, verurteilt, brandanschläge, asyl...</td>\n",
       "      <td>henkel,afd,verurteilt,brandanschläge,asylbewer...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>122015</td>\n",
       "      <td>Gauland: Die Slowaken wehren sich gegen Fremdb...</td>\n",
       "      <td>[gauland, slowaken, wehren, fremdbestimmung, b...</td>\n",
       "      <td>gauland,slowaken,wehren,fremdbestimmung,berlin...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>122016</td>\n",
       "      <td>Gauland: Migrantenzahlen steigen weiter – Frau...</td>\n",
       "      <td>[gauland, migrantenzahlen, steigen, frau, merk...</td>\n",
       "      <td>gauland,migrantenzahlen,steigen,frau,merkel,bl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>122017</td>\n",
       "      <td>Georg Pazderski: Alter vermeintlich minderjähr...</td>\n",
       "      <td>[georg, pazderski, alter, vermeintlich, minder...</td>\n",
       "      <td>georg,pazderski,alter,vermeintlich,minderjähri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>122018</td>\n",
       "      <td>Alexander Gauland: 1 Million Hartz-IV-Bezieher...</td>\n",
       "      <td>[alexander, gauland, million, migranten, steue...</td>\n",
       "      <td>alexander,gauland,million,migranten,steuerzahl...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012015  Weidel: Dem Bundessozialgericht liegt nichts a...   \n",
       "1   012016  Petry: Massenhafter Missbrauch von Frauen in K...   \n",
       "2   012017  Poggenburg: Mindestlohn: Keine Ausnahmeregelun...   \n",
       "3   012018  Georg Pazderski: Biologische Altersfeststellun...   \n",
       "4   012019  Alexander Gauland: Die Asylkrise überfordert u...   \n",
       "..     ...                                                ...   \n",
       "59  122014  Henkel: Die AfD verurteilt Brandanschläge auf ...   \n",
       "60  122015  Gauland: Die Slowaken wehren sich gegen Fremdb...   \n",
       "61  122016  Gauland: Migrantenzahlen steigen weiter – Frau...   \n",
       "62  122017  Georg Pazderski: Alter vermeintlich minderjähr...   \n",
       "63  122018  Alexander Gauland: 1 Million Hartz-IV-Bezieher...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [weidel, bundessozialgericht, liegt, sozialsta...   \n",
       "1   [petry, massenhafter, missbrauch, frauen, köln...   \n",
       "2   [poggenburg, mindestlohn, ausnahmeregelungen, ...   \n",
       "3   [georg, pazderski, biologische, altersfeststel...   \n",
       "4   [alexander, gauland, asylkrise, überfordert, r...   \n",
       "..                                                ...   \n",
       "59  [henkel, afd, verurteilt, brandanschläge, asyl...   \n",
       "60  [gauland, slowaken, wehren, fremdbestimmung, b...   \n",
       "61  [gauland, migrantenzahlen, steigen, frau, merk...   \n",
       "62  [georg, pazderski, alter, vermeintlich, minder...   \n",
       "63  [alexander, gauland, million, migranten, steue...   \n",
       "\n",
       "                                           liststring  \n",
       "0   weidel,bundessozialgericht,liegt,sozialstaat,b...  \n",
       "1   petry,massenhafter,missbrauch,frauen,köln,erin...  \n",
       "2   poggenburg,mindestlohn,ausnahmeregelungen,asyl...  \n",
       "3   georg,pazderski,biologische,altersfeststellung...  \n",
       "4   alexander,gauland,asylkrise,überfordert,rechts...  \n",
       "..                                                ...  \n",
       "59  henkel,afd,verurteilt,brandanschläge,asylbewer...  \n",
       "60  gauland,slowaken,wehren,fremdbestimmung,berlin...  \n",
       "61  gauland,migrantenzahlen,steigen,frau,merkel,bl...  \n",
       "62  georg,pazderski,alter,vermeintlich,minderjähri...  \n",
       "63  alexander,gauland,million,migranten,steuerzahl...  \n",
       "\n",
       "[64 rows x 4 columns]"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#convert nlp processed column to string\n",
    "afd_rel_final['liststring'] = [','.join(map(str, l)) for l in afd_rel_final['nlpprocessed']]\n",
    "afd_rel_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Weidel: Dem Bundessozialgericht liegt nichts a...</td>\n",
       "      <td>[weidel, bundessozialgericht, liegt, sozialsta...</td>\n",
       "      <td>weidel,bundessozialgericht,liegt,sozialstaat,b...</td>\n",
       "      <td>104</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Petry: Massenhafter Missbrauch von Frauen in K...</td>\n",
       "      <td>[petry, massenhafter, missbrauch, frauen, köln...</td>\n",
       "      <td>petry,massenhafter,missbrauch,frauen,köln,erin...</td>\n",
       "      <td>482</td>\n",
       "      <td>364</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Poggenburg: Mindestlohn: Keine Ausnahmeregelun...</td>\n",
       "      <td>[poggenburg, mindestlohn, ausnahmeregelungen, ...</td>\n",
       "      <td>poggenburg,mindestlohn,ausnahmeregelungen,asyl...</td>\n",
       "      <td>263</td>\n",
       "      <td>219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Georg Pazderski: Biologische Altersfeststellun...</td>\n",
       "      <td>[georg, pazderski, biologische, altersfeststel...</td>\n",
       "      <td>georg,pazderski,biologische,altersfeststellung...</td>\n",
       "      <td>106</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012019</td>\n",
       "      <td>Alexander Gauland: Die Asylkrise überfordert u...</td>\n",
       "      <td>[alexander, gauland, asylkrise, überfordert, r...</td>\n",
       "      <td>alexander,gauland,asylkrise,überfordert,rechts...</td>\n",
       "      <td>105</td>\n",
       "      <td>92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>122014</td>\n",
       "      <td>Henkel: Die AfD verurteilt Brandanschläge auf ...</td>\n",
       "      <td>[henkel, afd, verurteilt, brandanschläge, asyl...</td>\n",
       "      <td>henkel,afd,verurteilt,brandanschläge,asylbewer...</td>\n",
       "      <td>79</td>\n",
       "      <td>57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>122015</td>\n",
       "      <td>Gauland: Die Slowaken wehren sich gegen Fremdb...</td>\n",
       "      <td>[gauland, slowaken, wehren, fremdbestimmung, b...</td>\n",
       "      <td>gauland,slowaken,wehren,fremdbestimmung,berlin...</td>\n",
       "      <td>180</td>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>122016</td>\n",
       "      <td>Gauland: Migrantenzahlen steigen weiter – Frau...</td>\n",
       "      <td>[gauland, migrantenzahlen, steigen, frau, merk...</td>\n",
       "      <td>gauland,migrantenzahlen,steigen,frau,merkel,bl...</td>\n",
       "      <td>37</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>122017</td>\n",
       "      <td>Georg Pazderski: Alter vermeintlich minderjähr...</td>\n",
       "      <td>[georg, pazderski, alter, vermeintlich, minder...</td>\n",
       "      <td>georg,pazderski,alter,vermeintlich,minderjähri...</td>\n",
       "      <td>147</td>\n",
       "      <td>122</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>122018</td>\n",
       "      <td>Alexander Gauland: 1 Million Hartz-IV-Bezieher...</td>\n",
       "      <td>[alexander, gauland, million, migranten, steue...</td>\n",
       "      <td>alexander,gauland,million,migranten,steuerzahl...</td>\n",
       "      <td>83</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012015  Weidel: Dem Bundessozialgericht liegt nichts a...   \n",
       "1   012016  Petry: Massenhafter Missbrauch von Frauen in K...   \n",
       "2   012017  Poggenburg: Mindestlohn: Keine Ausnahmeregelun...   \n",
       "3   012018  Georg Pazderski: Biologische Altersfeststellun...   \n",
       "4   012019  Alexander Gauland: Die Asylkrise überfordert u...   \n",
       "..     ...                                                ...   \n",
       "59  122014  Henkel: Die AfD verurteilt Brandanschläge auf ...   \n",
       "60  122015  Gauland: Die Slowaken wehren sich gegen Fremdb...   \n",
       "61  122016  Gauland: Migrantenzahlen steigen weiter – Frau...   \n",
       "62  122017  Georg Pazderski: Alter vermeintlich minderjähr...   \n",
       "63  122018  Alexander Gauland: 1 Million Hartz-IV-Bezieher...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [weidel, bundessozialgericht, liegt, sozialsta...   \n",
       "1   [petry, massenhafter, missbrauch, frauen, köln...   \n",
       "2   [poggenburg, mindestlohn, ausnahmeregelungen, ...   \n",
       "3   [georg, pazderski, biologische, altersfeststel...   \n",
       "4   [alexander, gauland, asylkrise, überfordert, r...   \n",
       "..                                                ...   \n",
       "59  [henkel, afd, verurteilt, brandanschläge, asyl...   \n",
       "60  [gauland, slowaken, wehren, fremdbestimmung, b...   \n",
       "61  [gauland, migrantenzahlen, steigen, frau, merk...   \n",
       "62  [georg, pazderski, alter, vermeintlich, minder...   \n",
       "63  [alexander, gauland, million, migranten, steue...   \n",
       "\n",
       "                                           liststring  positive words  \\\n",
       "0   weidel,bundessozialgericht,liegt,sozialstaat,b...             104   \n",
       "1   petry,massenhafter,missbrauch,frauen,köln,erin...             482   \n",
       "2   poggenburg,mindestlohn,ausnahmeregelungen,asyl...             263   \n",
       "3   georg,pazderski,biologische,altersfeststellung...             106   \n",
       "4   alexander,gauland,asylkrise,überfordert,rechts...             105   \n",
       "..                                                ...             ...   \n",
       "59  henkel,afd,verurteilt,brandanschläge,asylbewer...              79   \n",
       "60  gauland,slowaken,wehren,fremdbestimmung,berlin...             180   \n",
       "61  gauland,migrantenzahlen,steigen,frau,merkel,bl...              37   \n",
       "62  georg,pazderski,alter,vermeintlich,minderjähri...             147   \n",
       "63  alexander,gauland,million,migranten,steuerzahl...              83   \n",
       "\n",
       "    negative words  \n",
       "0               51  \n",
       "1              364  \n",
       "2              219  \n",
       "3               64  \n",
       "4               92  \n",
       "..             ...  \n",
       "59              57  \n",
       "60             161  \n",
       "61              29  \n",
       "62             122  \n",
       "63              51  \n",
       "\n",
       "[64 rows x 6 columns]"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "afd_rel_final['positive words'] = afd_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "afd_rel_final['negative words'] = afd_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "afd_rel_final\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "conditions = [\n",
    "(afd_rel_final['positive words'] > afd_rel_final['negative words']),\n",
    "(afd_rel_final['negative words'] > afd_rel_final['positive words']),\n",
    "(afd_rel_final['negative words'] == afd_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "afd_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "afd_rel_final\n",
    "\n",
    "#clean dataset \n",
    "afd_rel_final = afd_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "afd_rel_final.head()\n",
    "afd_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/afd_sentiment.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# German newspapers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Bild\n",
    "bild = pd.read_csv(\"/Users/ashrakatelshehawy/bild_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None,  error_bad_lines=False)\n",
    "\n",
    "#remove unnecessary columns\n",
    "bild_rel = bild.drop(bild.columns[[0,1,5,6,7,8]], axis=1)\n",
    "#give column names\n",
    "bild_rel.columns = ['date', 'title',\"content\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>781</th>\n",
       "      <td>20180701</td>\n",
       "      <td>Themenseiten</td>\n",
       "      <td>Flüchtlinge                                 Se...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>20160615</td>\n",
       "      <td>Streit um sichere Herkunftsländer | Asylanträg...</td>\n",
       "      <td>Streit um sichere Herkunftsländer | Asylanträg...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>20160117</td>\n",
       "      <td>Merkel-Plan | Schnellere Abschiebung&lt;br /&gt;für ...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>856</th>\n",
       "      <td>20181210</td>\n",
       "      <td>Merkel beim Migrationspakt-Gipfel in Marrakesc...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>562</th>\n",
       "      <td>20140121</td>\n",
       "      <td>Blutiger Bürgerkrieg in Zentralafrika: EU schi...</td>\n",
       "      <td>Mehr Infos » Französische Soldaten patrouillie...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>277</th>\n",
       "      <td>20160510</td>\n",
       "      <td>Übergriffe auf Christen in Flüchtlingsheimen |...</td>\n",
       "      <td>Übergriffe auf Christen in Flüchtlingsheimen |...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>795</th>\n",
       "      <td>20171115</td>\n",
       "      <td>CNN filmt Sklavenhandel | Perverse Menschen-&lt;b...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>799</th>\n",
       "      <td>20161201</td>\n",
       "      <td>Bundespresseball | Flüchtlings-Satire sorgt fü...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>819</th>\n",
       "      <td>20160902</td>\n",
       "      <td>Morddrohung im Gericht | Mann (26) drohte Haft...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>20171208</td>\n",
       "      <td>Geheim-Bericht der EU | Immer mehr Flüchtlinge...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>535</th>\n",
       "      <td>20151231</td>\n",
       "      <td>Neujahrsansprache der Kanzlerin | „Ich sage vo...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>612</th>\n",
       "      <td>20180801</td>\n",
       "      <td>Düzen Tekkal zur Debatte um Özil und Rassismus...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>329</th>\n",
       "      <td>20180117</td>\n",
       "      <td>13 Geschwister aus Horror-Haus befreit | Kinde...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>20170206</td>\n",
       "      <td>Schockzahl 8 Millionen | Wie viele „Illegale“&lt;...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>807</th>\n",
       "      <td>20180615</td>\n",
       "      <td>Asyl-Zoff mit Merkel | Was plant Seehofer dafü...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>163</th>\n",
       "      <td>20150710</td>\n",
       "      <td>Endloser Krieg in Syrien | Erstmals über vier ...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>242</th>\n",
       "      <td>20160328</td>\n",
       "      <td>Terrorgefahr, Verbrechen, Unruhen | Das sind d...</td>\n",
       "      <td>Terrorgefahr, Verbrechen, Unruhen: Die 17 gefä...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>738</th>\n",
       "      <td>20180402</td>\n",
       "      <td>Ungarn wählt am 8. April | Wie Orbán mitFlücht...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>834</th>\n",
       "      <td>20181012</td>\n",
       "      <td>„Maybrit Illner“ wird zur „Folterstunde“ | Str...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>183</th>\n",
       "      <td>20170605</td>\n",
       "      <td>Eigentlich sollte sie steigen … | Zahl der Abs...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20160611</td>\n",
       "      <td>Politiker schlagen Alarm | Kinder-Ehe hatDeuts...</td>\n",
       "      <td>Kinder-Ehe hat Deutschland erreicht Glücklich ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>575</th>\n",
       "      <td>20170607</td>\n",
       "      <td>| Lieber Bürgermeister von London,</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>343</th>\n",
       "      <td>20161222</td>\n",
       "      <td>Propaganda-Show mit Twitter-Mädchen | Erdogan ...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>668</th>\n",
       "      <td>20161026</td>\n",
       "      <td>Weil sich seine Frau einem anderen Mann zuwand...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>480</th>\n",
       "      <td>20180826</td>\n",
       "      <td>„Freiheitsberaubung, Machtmissbrauch“ | Justiz...</td>\n",
       "      <td>Ihr Gerät unterstützt kein Javascript.Bitte ak...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date                                              title  \\\n",
       "781  20180701                                       Themenseiten   \n",
       "39   20160615  Streit um sichere Herkunftsländer | Asylanträg...   \n",
       "275  20160117  Merkel-Plan | Schnellere Abschiebung<br />für ...   \n",
       "856  20181210  Merkel beim Migrationspakt-Gipfel in Marrakesc...   \n",
       "562  20140121  Blutiger Bürgerkrieg in Zentralafrika: EU schi...   \n",
       "277  20160510  Übergriffe auf Christen in Flüchtlingsheimen |...   \n",
       "795  20171115  CNN filmt Sklavenhandel | Perverse Menschen-<b...   \n",
       "799  20161201  Bundespresseball | Flüchtlings-Satire sorgt fü...   \n",
       "819  20160902  Morddrohung im Gericht | Mann (26) drohte Haft...   \n",
       "48   20171208  Geheim-Bericht der EU | Immer mehr Flüchtlinge...   \n",
       "535  20151231  Neujahrsansprache der Kanzlerin | „Ich sage vo...   \n",
       "612  20180801  Düzen Tekkal zur Debatte um Özil und Rassismus...   \n",
       "329  20180117  13 Geschwister aus Horror-Haus befreit | Kinde...   \n",
       "88   20170206  Schockzahl 8 Millionen | Wie viele „Illegale“<...   \n",
       "807  20180615  Asyl-Zoff mit Merkel | Was plant Seehofer dafü...   \n",
       "163  20150710  Endloser Krieg in Syrien | Erstmals über vier ...   \n",
       "242  20160328  Terrorgefahr, Verbrechen, Unruhen | Das sind d...   \n",
       "738  20180402  Ungarn wählt am 8. April | Wie Orbán mitFlücht...   \n",
       "834  20181012  „Maybrit Illner“ wird zur „Folterstunde“ | Str...   \n",
       "183  20170605  Eigentlich sollte sie steigen … | Zahl der Abs...   \n",
       "0    20160611  Politiker schlagen Alarm | Kinder-Ehe hatDeuts...   \n",
       "575  20170607                 | Lieber Bürgermeister von London,   \n",
       "343  20161222  Propaganda-Show mit Twitter-Mädchen | Erdogan ...   \n",
       "668  20161026  Weil sich seine Frau einem anderen Mann zuwand...   \n",
       "480  20180826  „Freiheitsberaubung, Machtmissbrauch“ | Justiz...   \n",
       "\n",
       "                                               content  \n",
       "781  Flüchtlinge                                 Se...  \n",
       "39   Streit um sichere Herkunftsländer | Asylanträg...  \n",
       "275  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "856  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "562  Mehr Infos » Französische Soldaten patrouillie...  \n",
       "277  Übergriffe auf Christen in Flüchtlingsheimen |...  \n",
       "795  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "799  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "819  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "48   Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "535  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "612  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "329  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "88   Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "807  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "163  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "242  Terrorgefahr, Verbrechen, Unruhen: Die 17 gefä...  \n",
       "738  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "834  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "183  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "0    Kinder-Ehe hat Deutschland erreicht Glücklich ...  \n",
       "575  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "343  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "668  Ihr Gerät unterstützt kein Javascript.Bitte ak...  \n",
       "480  Ihr Gerät unterstützt kein Javascript.Bitte ak...  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#random sample\n",
    "bild_rel_sample=bild_rel.sample(25)\n",
    "bild_rel_sample"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "#we add title to content\n",
    "bild_rel[\"content\"]=bild_rel[\"title\"]+[\" \"]+bild_rel[\"content\"]\n",
    "\n",
    "#drop unnessecary column\n",
    "bild_rel.drop(bild_rel.columns[1], axis=1, inplace=True)\n",
    "\n",
    "# lets remove the day because we dont need it\n",
    "bild_rel[\"date\"] = bild_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "\n",
    "#now we extract last two digits to change months structure\n",
    "#first change to charachter\n",
    "bild_rel['date1'] = bild_rel['date'].apply(str)\n",
    "\n",
    "#new column with months and years\n",
    "bild_rel['month'] = bild_rel['date1'].str[4:6]\n",
    "bild_rel['year'] = bild_rel['date1'].str[0:4]\n",
    "\n",
    "#same date structure as the other datasets\n",
    "bild_rel[\"my\"]=bild_rel[\"month\"]+bild_rel[\"year\"]\n",
    "bild_rel.head()\n",
    "\n",
    "#change month interger to month name\n",
    "import calendar\n",
    "\n",
    "#revert back to interger\n",
    "bild_rel['month'] = bild_rel['month'].apply(int)\n",
    "\n",
    "bild_rel2=bild_rel\n",
    "\n",
    "#use calendar to change month number to name\n",
    "bild_rel2['month'] = bild_rel2['month'].apply(lambda x: calendar.month_name[x])\n",
    "\n",
    "#have a consistent date variable for all datasets\n",
    "bild_rel2[\"month-year\"] = bild_rel2[\"month\"] +[\" \"]+ bild_rel2[\"year\"] \n",
    "bild_rel2.head()\n",
    "\n",
    "#aggregate over months\n",
    "bild_rel2=bild_rel2.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp pipeline text processing\n",
    "bild_rel2[\"nlpprocessed\"]=bild_rel2['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "bild_rel2['liststring'] = [','.join(map(str, l)) for l in bild_rel2['nlpprocessed']]\n",
    "bild_rel2['liststring'] = (bild_rel2['liststring'].replace(',',' ', regex=True))\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Migrationsbericht: Zahl der Zuwanderer auf höc...</td>\n",
       "      <td>[migrationsbericht, zahl, zuwanderer, höchstem...</td>\n",
       "      <td>migrationsbericht zahl zuwanderer höchstem sta...</td>\n",
       "      <td>239</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nach Massaker durch Islamisten in Paris | So s...</td>\n",
       "      <td>[massaker, islamisten, paris, schamlos, nutzen...</td>\n",
       "      <td>massaker islamisten paris schamlos nutzen afd ...</td>\n",
       "      <td>72</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Bei sexuellen Übergriffen und seriellen Straft...</td>\n",
       "      <td>[sexuellen, übergriffen, seriellen, straftaten...</td>\n",
       "      <td>sexuellen übergriffen seriellen straftaten sch...</td>\n",
       "      <td>1285</td>\n",
       "      <td>1117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Dramatisches Video von sinkendem Schiff | 1000...</td>\n",
       "      <td>[dramatisches, video, sinkendem, schiff, flüch...</td>\n",
       "      <td>dramatisches video sinkendem schiff flüchtling...</td>\n",
       "      <td>642</td>\n",
       "      <td>538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Bayerns Innenminister Joachim Herrmann (61, CS...</td>\n",
       "      <td>[bayerns, innenminister, joachim, herrmann, cs...</td>\n",
       "      <td>bayerns innenminister joachim herrmann csu jun...</td>\n",
       "      <td>659</td>\n",
       "      <td>593</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>022014</td>\n",
       "      <td>Maischberger-Talk: Euro-Hasser blamiert sich b...</td>\n",
       "      <td>[blamiert, maischberger, mehr, infos, bernd, l...</td>\n",
       "      <td>blamiert maischberger mehr infos bernd lucke m...</td>\n",
       "      <td>206</td>\n",
       "      <td>103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>022016</td>\n",
       "      <td>Abgelehnte Asylbewerber | Scharfe Kritik an Rü...</td>\n",
       "      <td>[abgelehnte, asylbewerber, scharfe, kritik, as...</td>\n",
       "      <td>abgelehnte asylbewerber scharfe kritik asylbew...</td>\n",
       "      <td>463</td>\n",
       "      <td>396</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>022017</td>\n",
       "      <td>Schockzahl 8 Millionen | Wie viele „Illegale“&lt;...</td>\n",
       "      <td>[schockzahl, millionen, viele, illegale, br, t...</td>\n",
       "      <td>schockzahl millionen viele illegale br trump r...</td>\n",
       "      <td>984</td>\n",
       "      <td>760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>022018</td>\n",
       "      <td>Autos und Haus beschmiert | Tafel-Chef droht m...</td>\n",
       "      <td>[autos, haus, beschmiert, droht, rücktritt, ge...</td>\n",
       "      <td>autos haus beschmiert droht rücktritt gerät un...</td>\n",
       "      <td>199</td>\n",
       "      <td>193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>032014</td>\n",
       "      <td>Angelina Jolie fordert Ende des Syrischen Bürg...</td>\n",
       "      <td>[angelina, jolie, fordert, ende, syrischen, bü...</td>\n",
       "      <td>angelina jolie fordert ende syrischen bürgerkr...</td>\n",
       "      <td>198</td>\n",
       "      <td>228</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>032015</td>\n",
       "      <td>Berufsschullehrer und seine Frau nehmen Flücht...</td>\n",
       "      <td>[berufsschullehrer, frau, nehmen, flüchtlinge,...</td>\n",
       "      <td>berufsschullehrer frau nehmen flüchtlinge syri...</td>\n",
       "      <td>52</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>032016</td>\n",
       "      <td>Einigung beim EU-Gipfel | Was der Flüchtlings-...</td>\n",
       "      <td>[einigung, beim, türkei, bringt, einigung, brü...</td>\n",
       "      <td>einigung beim türkei bringt einigung brüssel b...</td>\n",
       "      <td>235</td>\n",
       "      <td>224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>032017</td>\n",
       "      <td>In einer Zentralen Erstaufnahmeeinrichtung | M...</td>\n",
       "      <td>[zentralen, erstaufnahmeeinrichtung, mädchen, ...</td>\n",
       "      <td>zentralen erstaufnahmeeinrichtung mädchen fünf...</td>\n",
       "      <td>871</td>\n",
       "      <td>766</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>032018</td>\n",
       "      <td>Plädoyer im Fall Hussein K., dem Killer von St...</td>\n",
       "      <td>[plädoyer, fall, hussein, killer, studentin, m...</td>\n",
       "      <td>plädoyer fall hussein killer studentin maria s...</td>\n",
       "      <td>531</td>\n",
       "      <td>497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>042014</td>\n",
       "      <td>Leserbrief an die GdP: Polizisten schreibt übe...</td>\n",
       "      <td>[leserbrief, gdp, polizisten, schreibt, respek...</td>\n",
       "      <td>leserbrief gdp polizisten schreibt respektlosi...</td>\n",
       "      <td>77</td>\n",
       "      <td>59</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>042015</td>\n",
       "      <td>Notfall-Plan der Stadt | 800 Flüchtlinge ins O...</td>\n",
       "      <td>[stadt, flüchtlinge, gerät, unterstützt, aktiv...</td>\n",
       "      <td>stadt flüchtlinge gerät unterstützt aktivieren...</td>\n",
       "      <td>49</td>\n",
       "      <td>38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>042016</td>\n",
       "      <td>Koalition beschließt neue Regeln zu Asyl, Absc...</td>\n",
       "      <td>[koalition, beschließt, neue, regeln, asyl, ab...</td>\n",
       "      <td>koalition beschließt neue regeln asyl abschieb...</td>\n",
       "      <td>633</td>\n",
       "      <td>524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>042017</td>\n",
       "      <td>Knapp 7 Prozent aller Straftäter sind Flüchtli...</td>\n",
       "      <td>[knapp, prozent, straftäter, flüchtlinge, wahr...</td>\n",
       "      <td>knapp prozent straftäter flüchtlinge wahrheit ...</td>\n",
       "      <td>457</td>\n",
       "      <td>458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>042018</td>\n",
       "      <td>Durch Flüchtlingskrise | Jeder zehnte Hartz-Em...</td>\n",
       "      <td>[flüchtlingskrise, zehnte, stammt, syrien, ger...</td>\n",
       "      <td>flüchtlingskrise zehnte stammt syrien gerät un...</td>\n",
       "      <td>821</td>\n",
       "      <td>716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>052014</td>\n",
       "      <td>Armutszuwanderer aus Ost-EU-Ländern: Hartz-Bez...</td>\n",
       "      <td>[armutszuwanderer, bulgarien, mehr, million, z...</td>\n",
       "      <td>armutszuwanderer bulgarien mehr million zuwand...</td>\n",
       "      <td>72</td>\n",
       "      <td>78</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>052015</td>\n",
       "      <td>Flüchtlinge Flüchtlinge                     Sc...</td>\n",
       "      <td>[flüchtlinge, flüchtlinge, schon, gewalt, berl...</td>\n",
       "      <td>flüchtlinge flüchtlinge schon gewalt berliner ...</td>\n",
       "      <td>14</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>052016</td>\n",
       "      <td>Die Chronologie der Abschottung | Ausländerrec...</td>\n",
       "      <td>[chronologie, abschottung, ausländerrecht, sch...</td>\n",
       "      <td>chronologie abschottung ausländerrecht scharf ...</td>\n",
       "      <td>723</td>\n",
       "      <td>706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>052017</td>\n",
       "      <td>Rechte Bande jagte Asylbewerber am Elbufer | E...</td>\n",
       "      <td>[rechte, bande, jagte, asylbewerber, elbufer, ...</td>\n",
       "      <td>rechte bande jagte asylbewerber elbufer erster...</td>\n",
       "      <td>306</td>\n",
       "      <td>295</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>052018</td>\n",
       "      <td>Neues BKA-Lagebild mit überraschenden Fakten |...</td>\n",
       "      <td>[neues, überraschenden, fakten, kriminell, sin...</td>\n",
       "      <td>neues überraschenden fakten kriminell sindzuwa...</td>\n",
       "      <td>1230</td>\n",
       "      <td>974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>062014</td>\n",
       "      <td>Flüchtlings-Tsunami überrollt Europa: Jeden Ta...</td>\n",
       "      <td>[überrollt, europa, tag, kommen, tausende, afr...</td>\n",
       "      <td>überrollt europa tag kommen tausende afrika vi...</td>\n",
       "      <td>45</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>062016</td>\n",
       "      <td>Politiker schlagen Alarm | Kinder-Ehe hatDeuts...</td>\n",
       "      <td>[politiker, schlagen, alarm, hatdeutschland, e...</td>\n",
       "      <td>politiker schlagen alarm hatdeutschland erreic...</td>\n",
       "      <td>1138</td>\n",
       "      <td>894</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>062017</td>\n",
       "      <td>| Keine Flucht in Hartz Ihr Gerät unterstützt ...</td>\n",
       "      <td>[flucht, hartz, gerät, unterstützt, aktivieren...</td>\n",
       "      <td>flucht hartz gerät unterstützt aktivieren java...</td>\n",
       "      <td>546</td>\n",
       "      <td>436</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>062018</td>\n",
       "      <td>Mörder, Vergewaltiger, Drogenhändler | Keiner ...</td>\n",
       "      <td>[mörder, vergewaltiger, drogenhändler, sagen, ...</td>\n",
       "      <td>mörder vergewaltiger drogenhändler sagen gerät...</td>\n",
       "      <td>3257</td>\n",
       "      <td>2002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>072014</td>\n",
       "      <td>Wieder Mädchen in Indien vergewaltigt? Lynchju...</td>\n",
       "      <td>[mädchen, indien, vergewaltigt, lynchjustiz, w...</td>\n",
       "      <td>mädchen indien vergewaltigt lynchjustiz wütend...</td>\n",
       "      <td>42</td>\n",
       "      <td>57</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>072015</td>\n",
       "      <td>| Der Fall Reem Ihr Gerät unterstützt kein Jav...</td>\n",
       "      <td>[fall, reem, gerät, unterstützt, aktivieren, j...</td>\n",
       "      <td>fall reem gerät unterstützt aktivieren javascr...</td>\n",
       "      <td>181</td>\n",
       "      <td>156</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>072016</td>\n",
       "      <td>Neuer AfD-Hass | Gauland will Asylrecht für Mu...</td>\n",
       "      <td>[neuer, gauland, asylrecht, muslime, aussetzen...</td>\n",
       "      <td>neuer gauland asylrecht muslime aussetzen neue...</td>\n",
       "      <td>1071</td>\n",
       "      <td>976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>072017</td>\n",
       "      <td>Rom will Häfen für Flüchtlinge schließen | Was...</td>\n",
       "      <td>[rom, häfen, flüchtlinge, schließen, passiert,...</td>\n",
       "      <td>rom häfen flüchtlinge schließen passiert itali...</td>\n",
       "      <td>651</td>\n",
       "      <td>478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>072018</td>\n",
       "      <td>„Wir wollen nicht sterben!“ | 270 000 Syrer fl...</td>\n",
       "      <td>[sterben, syrer, flehen, öffnung, jordanischer...</td>\n",
       "      <td>sterben syrer flehen öffnung jordanischer gren...</td>\n",
       "      <td>2843</td>\n",
       "      <td>1898</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>082015</td>\n",
       "      <td>Merkel macht Druck bei Flüchtlingspolitik | „A...</td>\n",
       "      <td>[merkel, macht, druck, flüchtlingspolitik, sch...</td>\n",
       "      <td>merkel macht druck flüchtlingspolitik schnell ...</td>\n",
       "      <td>344</td>\n",
       "      <td>346</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>082016</td>\n",
       "      <td>Zuwanderung | Jeder 4. Hartz-IV-Bezieher ist m...</td>\n",
       "      <td>[zuwanderung, mittlerweile, ausländer, zuwande...</td>\n",
       "      <td>zuwanderung mittlerweile ausländer zuwanderung...</td>\n",
       "      <td>848</td>\n",
       "      <td>671</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>082017</td>\n",
       "      <td>„Einwanderer-Kontingente“ | Merkel offen für&lt;b...</td>\n",
       "      <td>[merkel, offen, br, pflegekräfte, afrika, gerä...</td>\n",
       "      <td>merkel offen br pflegekräfte afrika gerät unte...</td>\n",
       "      <td>804</td>\n",
       "      <td>595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>082018</td>\n",
       "      <td>Vorstoß von Annegret Kramp-Karrenbauer | CDU w...</td>\n",
       "      <td>[vorstoß, annegret, cdu, dienstpflichtfür, flü...</td>\n",
       "      <td>vorstoß annegret cdu dienstpflichtfür flüchtli...</td>\n",
       "      <td>864</td>\n",
       "      <td>628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>092016</td>\n",
       "      <td>Unicef-Bericht | Weltweit 28 Millionen Kinder&lt;...</td>\n",
       "      <td>[weltweit, millionen, kinder, br, flucht, gerä...</td>\n",
       "      <td>weltweit millionen kinder br flucht gerät unte...</td>\n",
       "      <td>1126</td>\n",
       "      <td>954</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>092017</td>\n",
       "      <td>Die letzte Entscheidungshilfe | Alle Wahlprogr...</td>\n",
       "      <td>[letzte, entscheidungshilfe, wahlprogramme, br...</td>\n",
       "      <td>letzte entscheidungshilfe wahlprogramme br ger...</td>\n",
       "      <td>2596</td>\n",
       "      <td>1680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>092018</td>\n",
       "      <td>Nach Attacken in Frankfurt/oder | Linker Bürge...</td>\n",
       "      <td>[attacken, linker, bürgermeister, willintensiv...</td>\n",
       "      <td>attacken linker bürgermeister willintensivtäte...</td>\n",
       "      <td>653</td>\n",
       "      <td>482</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>102014</td>\n",
       "      <td>Traumatisiert im Syrien-Krieg | Aya (7) beginn...</td>\n",
       "      <td>[traumatisiert, aya, beginnt, tanzen, schüsse,...</td>\n",
       "      <td>traumatisiert aya beginnt tanzen schüsse hört ...</td>\n",
       "      <td>86</td>\n",
       "      <td>68</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>102015</td>\n",
       "      <td>In Berlin-Moabit | Mob attackiert Flüchtling a...</td>\n",
       "      <td>[mob, attackiert, flüchtling, offener, straße,...</td>\n",
       "      <td>mob attackiert flüchtling offener straße gerät...</td>\n",
       "      <td>227</td>\n",
       "      <td>286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>102016</td>\n",
       "      <td>Missbrauch, Zwangsehe, Genitalverstümmelungen ...</td>\n",
       "      <td>[missbrauch, zwangsehe, genitalverstümmelungen...</td>\n",
       "      <td>missbrauch zwangsehe genitalverstümmelungen ge...</td>\n",
       "      <td>1139</td>\n",
       "      <td>922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>102017</td>\n",
       "      <td>| Mühen und bemühen Ihr Gerät unterstützt kein...</td>\n",
       "      <td>[mühen, bemühen, gerät, unterstützt, aktiviere...</td>\n",
       "      <td>mühen bemühen gerät unterstützt aktivieren jav...</td>\n",
       "      <td>466</td>\n",
       "      <td>369</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>102018</td>\n",
       "      <td>Neue Zahlen | Weniger Menschen brauchen Sozial...</td>\n",
       "      <td>[neue, zahlen, weniger, menschen, brauchen, so...</td>\n",
       "      <td>neue zahlen weniger menschen brauchen sozialhi...</td>\n",
       "      <td>845</td>\n",
       "      <td>563</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>112015</td>\n",
       "      <td>Erst 116 von 160000 | Umsiedelung von Flüchtli...</td>\n",
       "      <td>[erst, umsiedelung, flüchtlingen, langsam, ger...</td>\n",
       "      <td>erst umsiedelung flüchtlingen langsam gerät un...</td>\n",
       "      <td>510</td>\n",
       "      <td>382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>112016</td>\n",
       "      <td>Perfide Masche der Schleuser | Flüchtlinge mit...</td>\n",
       "      <td>[perfide, masche, schleuser, flüchtlinge, ruhi...</td>\n",
       "      <td>perfide masche schleuser flüchtlinge ruhiggest...</td>\n",
       "      <td>728</td>\n",
       "      <td>623</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>112017</td>\n",
       "      <td>| Warum? Ihr Gerät unterstützt kein Javascript...</td>\n",
       "      <td>[warum, gerät, unterstützt, aktivieren, javasc...</td>\n",
       "      <td>warum gerät unterstützt aktivieren javascript ...</td>\n",
       "      <td>641</td>\n",
       "      <td>479</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>112018</td>\n",
       "      <td>Vergewaltiger wird zur Abschreckung in die Tür...</td>\n",
       "      <td>[vergewaltiger, abschreckung, türkei, abgescho...</td>\n",
       "      <td>vergewaltiger abschreckung türkei abgeschoben ...</td>\n",
       "      <td>1250</td>\n",
       "      <td>935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>122014</td>\n",
       "      <td>| Lieber Horst Seehofer, Ihr Gerät unterstützt...</td>\n",
       "      <td>[lieber, horst, seehofer, gerät, unterstützt, ...</td>\n",
       "      <td>lieber horst seehofer gerät unterstützt aktivi...</td>\n",
       "      <td>47</td>\n",
       "      <td>29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>122015</td>\n",
       "      <td>Erste Statistik | Die Wahrheit über kriminelle...</td>\n",
       "      <td>[erste, statistik, wahrheit, kriminelle, asylb...</td>\n",
       "      <td>erste statistik wahrheit kriminelle asylbewerb...</td>\n",
       "      <td>387</td>\n",
       "      <td>304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>122016</td>\n",
       "      <td>Asylrecht-Streit vor Parteitag | CDU-Abgeordne...</td>\n",
       "      <td>[parteitag, fordern, br, kurskorrektur, merkel...</td>\n",
       "      <td>parteitag fordern br kurskorrektur merkel gerä...</td>\n",
       "      <td>1408</td>\n",
       "      <td>1266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>122017</td>\n",
       "      <td>Geheim-Bericht der EU | Immer mehr Flüchtlinge...</td>\n",
       "      <td>[eu, immer, mehr, flüchtlinge, br, nutzen, gef...</td>\n",
       "      <td>eu immer mehr flüchtlinge br nutzen gefälschte...</td>\n",
       "      <td>761</td>\n",
       "      <td>686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>122018</td>\n",
       "      <td>So ist die Lage auf dem Arbeitsmarkt wirklich ...</td>\n",
       "      <td>[lage, arbeitsmarkt, wirklich, zwei, drei, flü...</td>\n",
       "      <td>lage arbeitsmarkt wirklich zwei drei flüchtlin...</td>\n",
       "      <td>688</td>\n",
       "      <td>456</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012014  Migrationsbericht: Zahl der Zuwanderer auf höc...   \n",
       "1   012015  Nach Massaker durch Islamisten in Paris | So s...   \n",
       "2   012016  Bei sexuellen Übergriffen und seriellen Straft...   \n",
       "3   012017  Dramatisches Video von sinkendem Schiff | 1000...   \n",
       "4   012018  Bayerns Innenminister Joachim Herrmann (61, CS...   \n",
       "5   022014  Maischberger-Talk: Euro-Hasser blamiert sich b...   \n",
       "6   022016  Abgelehnte Asylbewerber | Scharfe Kritik an Rü...   \n",
       "7   022017  Schockzahl 8 Millionen | Wie viele „Illegale“<...   \n",
       "8   022018  Autos und Haus beschmiert | Tafel-Chef droht m...   \n",
       "9   032014  Angelina Jolie fordert Ende des Syrischen Bürg...   \n",
       "10  032015  Berufsschullehrer und seine Frau nehmen Flücht...   \n",
       "11  032016  Einigung beim EU-Gipfel | Was der Flüchtlings-...   \n",
       "12  032017  In einer Zentralen Erstaufnahmeeinrichtung | M...   \n",
       "13  032018  Plädoyer im Fall Hussein K., dem Killer von St...   \n",
       "14  042014  Leserbrief an die GdP: Polizisten schreibt übe...   \n",
       "15  042015  Notfall-Plan der Stadt | 800 Flüchtlinge ins O...   \n",
       "16  042016  Koalition beschließt neue Regeln zu Asyl, Absc...   \n",
       "17  042017  Knapp 7 Prozent aller Straftäter sind Flüchtli...   \n",
       "18  042018  Durch Flüchtlingskrise | Jeder zehnte Hartz-Em...   \n",
       "19  052014  Armutszuwanderer aus Ost-EU-Ländern: Hartz-Bez...   \n",
       "20  052015  Flüchtlinge Flüchtlinge                     Sc...   \n",
       "21  052016  Die Chronologie der Abschottung | Ausländerrec...   \n",
       "22  052017  Rechte Bande jagte Asylbewerber am Elbufer | E...   \n",
       "23  052018  Neues BKA-Lagebild mit überraschenden Fakten |...   \n",
       "24  062014  Flüchtlings-Tsunami überrollt Europa: Jeden Ta...   \n",
       "25  062016  Politiker schlagen Alarm | Kinder-Ehe hatDeuts...   \n",
       "26  062017  | Keine Flucht in Hartz Ihr Gerät unterstützt ...   \n",
       "27  062018  Mörder, Vergewaltiger, Drogenhändler | Keiner ...   \n",
       "28  072014  Wieder Mädchen in Indien vergewaltigt? Lynchju...   \n",
       "29  072015  | Der Fall Reem Ihr Gerät unterstützt kein Jav...   \n",
       "30  072016  Neuer AfD-Hass | Gauland will Asylrecht für Mu...   \n",
       "31  072017  Rom will Häfen für Flüchtlinge schließen | Was...   \n",
       "32  072018  „Wir wollen nicht sterben!“ | 270 000 Syrer fl...   \n",
       "33  082015  Merkel macht Druck bei Flüchtlingspolitik | „A...   \n",
       "34  082016  Zuwanderung | Jeder 4. Hartz-IV-Bezieher ist m...   \n",
       "35  082017  „Einwanderer-Kontingente“ | Merkel offen für<b...   \n",
       "36  082018  Vorstoß von Annegret Kramp-Karrenbauer | CDU w...   \n",
       "37  092016  Unicef-Bericht | Weltweit 28 Millionen Kinder<...   \n",
       "38  092017  Die letzte Entscheidungshilfe | Alle Wahlprogr...   \n",
       "39  092018  Nach Attacken in Frankfurt/oder | Linker Bürge...   \n",
       "40  102014  Traumatisiert im Syrien-Krieg | Aya (7) beginn...   \n",
       "41  102015  In Berlin-Moabit | Mob attackiert Flüchtling a...   \n",
       "42  102016  Missbrauch, Zwangsehe, Genitalverstümmelungen ...   \n",
       "43  102017  | Mühen und bemühen Ihr Gerät unterstützt kein...   \n",
       "44  102018  Neue Zahlen | Weniger Menschen brauchen Sozial...   \n",
       "45  112015  Erst 116 von 160000 | Umsiedelung von Flüchtli...   \n",
       "46  112016  Perfide Masche der Schleuser | Flüchtlinge mit...   \n",
       "47  112017  | Warum? Ihr Gerät unterstützt kein Javascript...   \n",
       "48  112018  Vergewaltiger wird zur Abschreckung in die Tür...   \n",
       "49  122014  | Lieber Horst Seehofer, Ihr Gerät unterstützt...   \n",
       "50  122015  Erste Statistik | Die Wahrheit über kriminelle...   \n",
       "51  122016  Asylrecht-Streit vor Parteitag | CDU-Abgeordne...   \n",
       "52  122017  Geheim-Bericht der EU | Immer mehr Flüchtlinge...   \n",
       "53  122018  So ist die Lage auf dem Arbeitsmarkt wirklich ...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [migrationsbericht, zahl, zuwanderer, höchstem...   \n",
       "1   [massaker, islamisten, paris, schamlos, nutzen...   \n",
       "2   [sexuellen, übergriffen, seriellen, straftaten...   \n",
       "3   [dramatisches, video, sinkendem, schiff, flüch...   \n",
       "4   [bayerns, innenminister, joachim, herrmann, cs...   \n",
       "5   [blamiert, maischberger, mehr, infos, bernd, l...   \n",
       "6   [abgelehnte, asylbewerber, scharfe, kritik, as...   \n",
       "7   [schockzahl, millionen, viele, illegale, br, t...   \n",
       "8   [autos, haus, beschmiert, droht, rücktritt, ge...   \n",
       "9   [angelina, jolie, fordert, ende, syrischen, bü...   \n",
       "10  [berufsschullehrer, frau, nehmen, flüchtlinge,...   \n",
       "11  [einigung, beim, türkei, bringt, einigung, brü...   \n",
       "12  [zentralen, erstaufnahmeeinrichtung, mädchen, ...   \n",
       "13  [plädoyer, fall, hussein, killer, studentin, m...   \n",
       "14  [leserbrief, gdp, polizisten, schreibt, respek...   \n",
       "15  [stadt, flüchtlinge, gerät, unterstützt, aktiv...   \n",
       "16  [koalition, beschließt, neue, regeln, asyl, ab...   \n",
       "17  [knapp, prozent, straftäter, flüchtlinge, wahr...   \n",
       "18  [flüchtlingskrise, zehnte, stammt, syrien, ger...   \n",
       "19  [armutszuwanderer, bulgarien, mehr, million, z...   \n",
       "20  [flüchtlinge, flüchtlinge, schon, gewalt, berl...   \n",
       "21  [chronologie, abschottung, ausländerrecht, sch...   \n",
       "22  [rechte, bande, jagte, asylbewerber, elbufer, ...   \n",
       "23  [neues, überraschenden, fakten, kriminell, sin...   \n",
       "24  [überrollt, europa, tag, kommen, tausende, afr...   \n",
       "25  [politiker, schlagen, alarm, hatdeutschland, e...   \n",
       "26  [flucht, hartz, gerät, unterstützt, aktivieren...   \n",
       "27  [mörder, vergewaltiger, drogenhändler, sagen, ...   \n",
       "28  [mädchen, indien, vergewaltigt, lynchjustiz, w...   \n",
       "29  [fall, reem, gerät, unterstützt, aktivieren, j...   \n",
       "30  [neuer, gauland, asylrecht, muslime, aussetzen...   \n",
       "31  [rom, häfen, flüchtlinge, schließen, passiert,...   \n",
       "32  [sterben, syrer, flehen, öffnung, jordanischer...   \n",
       "33  [merkel, macht, druck, flüchtlingspolitik, sch...   \n",
       "34  [zuwanderung, mittlerweile, ausländer, zuwande...   \n",
       "35  [merkel, offen, br, pflegekräfte, afrika, gerä...   \n",
       "36  [vorstoß, annegret, cdu, dienstpflichtfür, flü...   \n",
       "37  [weltweit, millionen, kinder, br, flucht, gerä...   \n",
       "38  [letzte, entscheidungshilfe, wahlprogramme, br...   \n",
       "39  [attacken, linker, bürgermeister, willintensiv...   \n",
       "40  [traumatisiert, aya, beginnt, tanzen, schüsse,...   \n",
       "41  [mob, attackiert, flüchtling, offener, straße,...   \n",
       "42  [missbrauch, zwangsehe, genitalverstümmelungen...   \n",
       "43  [mühen, bemühen, gerät, unterstützt, aktiviere...   \n",
       "44  [neue, zahlen, weniger, menschen, brauchen, so...   \n",
       "45  [erst, umsiedelung, flüchtlingen, langsam, ger...   \n",
       "46  [perfide, masche, schleuser, flüchtlinge, ruhi...   \n",
       "47  [warum, gerät, unterstützt, aktivieren, javasc...   \n",
       "48  [vergewaltiger, abschreckung, türkei, abgescho...   \n",
       "49  [lieber, horst, seehofer, gerät, unterstützt, ...   \n",
       "50  [erste, statistik, wahrheit, kriminelle, asylb...   \n",
       "51  [parteitag, fordern, br, kurskorrektur, merkel...   \n",
       "52  [eu, immer, mehr, flüchtlinge, br, nutzen, gef...   \n",
       "53  [lage, arbeitsmarkt, wirklich, zwei, drei, flü...   \n",
       "\n",
       "                                           liststring  positive words  \\\n",
       "0   migrationsbericht zahl zuwanderer höchstem sta...             239   \n",
       "1   massaker islamisten paris schamlos nutzen afd ...              72   \n",
       "2   sexuellen übergriffen seriellen straftaten sch...            1285   \n",
       "3   dramatisches video sinkendem schiff flüchtling...             642   \n",
       "4   bayerns innenminister joachim herrmann csu jun...             659   \n",
       "5   blamiert maischberger mehr infos bernd lucke m...             206   \n",
       "6   abgelehnte asylbewerber scharfe kritik asylbew...             463   \n",
       "7   schockzahl millionen viele illegale br trump r...             984   \n",
       "8   autos haus beschmiert droht rücktritt gerät un...             199   \n",
       "9   angelina jolie fordert ende syrischen bürgerkr...             198   \n",
       "10  berufsschullehrer frau nehmen flüchtlinge syri...              52   \n",
       "11  einigung beim türkei bringt einigung brüssel b...             235   \n",
       "12  zentralen erstaufnahmeeinrichtung mädchen fünf...             871   \n",
       "13  plädoyer fall hussein killer studentin maria s...             531   \n",
       "14  leserbrief gdp polizisten schreibt respektlosi...              77   \n",
       "15  stadt flüchtlinge gerät unterstützt aktivieren...              49   \n",
       "16  koalition beschließt neue regeln asyl abschieb...             633   \n",
       "17  knapp prozent straftäter flüchtlinge wahrheit ...             457   \n",
       "18  flüchtlingskrise zehnte stammt syrien gerät un...             821   \n",
       "19  armutszuwanderer bulgarien mehr million zuwand...              72   \n",
       "20  flüchtlinge flüchtlinge schon gewalt berliner ...              14   \n",
       "21  chronologie abschottung ausländerrecht scharf ...             723   \n",
       "22  rechte bande jagte asylbewerber elbufer erster...             306   \n",
       "23  neues überraschenden fakten kriminell sindzuwa...            1230   \n",
       "24  überrollt europa tag kommen tausende afrika vi...              45   \n",
       "25  politiker schlagen alarm hatdeutschland erreic...            1138   \n",
       "26  flucht hartz gerät unterstützt aktivieren java...             546   \n",
       "27  mörder vergewaltiger drogenhändler sagen gerät...            3257   \n",
       "28  mädchen indien vergewaltigt lynchjustiz wütend...              42   \n",
       "29  fall reem gerät unterstützt aktivieren javascr...             181   \n",
       "30  neuer gauland asylrecht muslime aussetzen neue...            1071   \n",
       "31  rom häfen flüchtlinge schließen passiert itali...             651   \n",
       "32  sterben syrer flehen öffnung jordanischer gren...            2843   \n",
       "33  merkel macht druck flüchtlingspolitik schnell ...             344   \n",
       "34  zuwanderung mittlerweile ausländer zuwanderung...             848   \n",
       "35  merkel offen br pflegekräfte afrika gerät unte...             804   \n",
       "36  vorstoß annegret cdu dienstpflichtfür flüchtli...             864   \n",
       "37  weltweit millionen kinder br flucht gerät unte...            1126   \n",
       "38  letzte entscheidungshilfe wahlprogramme br ger...            2596   \n",
       "39  attacken linker bürgermeister willintensivtäte...             653   \n",
       "40  traumatisiert aya beginnt tanzen schüsse hört ...              86   \n",
       "41  mob attackiert flüchtling offener straße gerät...             227   \n",
       "42  missbrauch zwangsehe genitalverstümmelungen ge...            1139   \n",
       "43  mühen bemühen gerät unterstützt aktivieren jav...             466   \n",
       "44  neue zahlen weniger menschen brauchen sozialhi...             845   \n",
       "45  erst umsiedelung flüchtlingen langsam gerät un...             510   \n",
       "46  perfide masche schleuser flüchtlinge ruhiggest...             728   \n",
       "47  warum gerät unterstützt aktivieren javascript ...             641   \n",
       "48  vergewaltiger abschreckung türkei abgeschoben ...            1250   \n",
       "49  lieber horst seehofer gerät unterstützt aktivi...              47   \n",
       "50  erste statistik wahrheit kriminelle asylbewerb...             387   \n",
       "51  parteitag fordern br kurskorrektur merkel gerä...            1408   \n",
       "52  eu immer mehr flüchtlinge br nutzen gefälschte...             761   \n",
       "53  lage arbeitsmarkt wirklich zwei drei flüchtlin...             688   \n",
       "\n",
       "    negative words  \n",
       "0               87  \n",
       "1               87  \n",
       "2             1117  \n",
       "3              538  \n",
       "4              593  \n",
       "5              103  \n",
       "6              396  \n",
       "7              760  \n",
       "8              193  \n",
       "9              228  \n",
       "10              33  \n",
       "11             224  \n",
       "12             766  \n",
       "13             497  \n",
       "14              59  \n",
       "15              38  \n",
       "16             524  \n",
       "17             458  \n",
       "18             716  \n",
       "19              78  \n",
       "20              17  \n",
       "21             706  \n",
       "22             295  \n",
       "23             974  \n",
       "24              55  \n",
       "25             894  \n",
       "26             436  \n",
       "27            2002  \n",
       "28              57  \n",
       "29             156  \n",
       "30             976  \n",
       "31             478  \n",
       "32            1898  \n",
       "33             346  \n",
       "34             671  \n",
       "35             595  \n",
       "36             628  \n",
       "37             954  \n",
       "38            1680  \n",
       "39             482  \n",
       "40              68  \n",
       "41             286  \n",
       "42             922  \n",
       "43             369  \n",
       "44             563  \n",
       "45             382  \n",
       "46             623  \n",
       "47             479  \n",
       "48             935  \n",
       "49              29  \n",
       "50             304  \n",
       "51            1266  \n",
       "52             686  \n",
       "53             456  "
      ]
     },
     "execution_count": 83,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bild_rel_final=bild_rel2\n",
    "bild_rel_final['positive words'] = bild_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "bild_rel_final['negative words'] = bild_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "\n",
    "\n",
    "bild_rel_final\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Migrationsbericht: Zahl der Zuwanderer auf höc...</td>\n",
       "      <td>[migrationsbericht, zahl, zuwanderer, höchstem...</td>\n",
       "      <td>migrationsbericht zahl zuwanderer höchstem sta...</td>\n",
       "      <td>239</td>\n",
       "      <td>87</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nach Massaker durch Islamisten in Paris | So s...</td>\n",
       "      <td>[massaker, islamisten, paris, schamlos, nutzen...</td>\n",
       "      <td>massaker islamisten paris schamlos nutzen afd ...</td>\n",
       "      <td>72</td>\n",
       "      <td>87</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Bei sexuellen Übergriffen und seriellen Straft...</td>\n",
       "      <td>[sexuellen, übergriffen, seriellen, straftaten...</td>\n",
       "      <td>sexuellen übergriffen seriellen straftaten sch...</td>\n",
       "      <td>1285</td>\n",
       "      <td>1117</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Dramatisches Video von sinkendem Schiff | 1000...</td>\n",
       "      <td>[dramatisches, video, sinkendem, schiff, flüch...</td>\n",
       "      <td>dramatisches video sinkendem schiff flüchtling...</td>\n",
       "      <td>642</td>\n",
       "      <td>538</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Bayerns Innenminister Joachim Herrmann (61, CS...</td>\n",
       "      <td>[bayerns, innenminister, joachim, herrmann, cs...</td>\n",
       "      <td>bayerns innenminister joachim herrmann csu jun...</td>\n",
       "      <td>659</td>\n",
       "      <td>593</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>022014</td>\n",
       "      <td>Maischberger-Talk: Euro-Hasser blamiert sich b...</td>\n",
       "      <td>[blamiert, maischberger, mehr, infos, bernd, l...</td>\n",
       "      <td>blamiert maischberger mehr infos bernd lucke m...</td>\n",
       "      <td>206</td>\n",
       "      <td>103</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>022016</td>\n",
       "      <td>Abgelehnte Asylbewerber | Scharfe Kritik an Rü...</td>\n",
       "      <td>[abgelehnte, asylbewerber, scharfe, kritik, as...</td>\n",
       "      <td>abgelehnte asylbewerber scharfe kritik asylbew...</td>\n",
       "      <td>463</td>\n",
       "      <td>396</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>022017</td>\n",
       "      <td>Schockzahl 8 Millionen | Wie viele „Illegale“&lt;...</td>\n",
       "      <td>[schockzahl, millionen, viele, illegale, br, t...</td>\n",
       "      <td>schockzahl millionen viele illegale br trump r...</td>\n",
       "      <td>984</td>\n",
       "      <td>760</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>022018</td>\n",
       "      <td>Autos und Haus beschmiert | Tafel-Chef droht m...</td>\n",
       "      <td>[autos, haus, beschmiert, droht, rücktritt, ge...</td>\n",
       "      <td>autos haus beschmiert droht rücktritt gerät un...</td>\n",
       "      <td>199</td>\n",
       "      <td>193</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>032014</td>\n",
       "      <td>Angelina Jolie fordert Ende des Syrischen Bürg...</td>\n",
       "      <td>[angelina, jolie, fordert, ende, syrischen, bü...</td>\n",
       "      <td>angelina jolie fordert ende syrischen bürgerkr...</td>\n",
       "      <td>198</td>\n",
       "      <td>228</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>032015</td>\n",
       "      <td>Berufsschullehrer und seine Frau nehmen Flücht...</td>\n",
       "      <td>[berufsschullehrer, frau, nehmen, flüchtlinge,...</td>\n",
       "      <td>berufsschullehrer frau nehmen flüchtlinge syri...</td>\n",
       "      <td>52</td>\n",
       "      <td>33</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>032016</td>\n",
       "      <td>Einigung beim EU-Gipfel | Was der Flüchtlings-...</td>\n",
       "      <td>[einigung, beim, türkei, bringt, einigung, brü...</td>\n",
       "      <td>einigung beim türkei bringt einigung brüssel b...</td>\n",
       "      <td>235</td>\n",
       "      <td>224</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>032017</td>\n",
       "      <td>In einer Zentralen Erstaufnahmeeinrichtung | M...</td>\n",
       "      <td>[zentralen, erstaufnahmeeinrichtung, mädchen, ...</td>\n",
       "      <td>zentralen erstaufnahmeeinrichtung mädchen fünf...</td>\n",
       "      <td>871</td>\n",
       "      <td>766</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>032018</td>\n",
       "      <td>Plädoyer im Fall Hussein K., dem Killer von St...</td>\n",
       "      <td>[plädoyer, fall, hussein, killer, studentin, m...</td>\n",
       "      <td>plädoyer fall hussein killer studentin maria s...</td>\n",
       "      <td>531</td>\n",
       "      <td>497</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>042014</td>\n",
       "      <td>Leserbrief an die GdP: Polizisten schreibt übe...</td>\n",
       "      <td>[leserbrief, gdp, polizisten, schreibt, respek...</td>\n",
       "      <td>leserbrief gdp polizisten schreibt respektlosi...</td>\n",
       "      <td>77</td>\n",
       "      <td>59</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>042015</td>\n",
       "      <td>Notfall-Plan der Stadt | 800 Flüchtlinge ins O...</td>\n",
       "      <td>[stadt, flüchtlinge, gerät, unterstützt, aktiv...</td>\n",
       "      <td>stadt flüchtlinge gerät unterstützt aktivieren...</td>\n",
       "      <td>49</td>\n",
       "      <td>38</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>042016</td>\n",
       "      <td>Koalition beschließt neue Regeln zu Asyl, Absc...</td>\n",
       "      <td>[koalition, beschließt, neue, regeln, asyl, ab...</td>\n",
       "      <td>koalition beschließt neue regeln asyl abschieb...</td>\n",
       "      <td>633</td>\n",
       "      <td>524</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>042017</td>\n",
       "      <td>Knapp 7 Prozent aller Straftäter sind Flüchtli...</td>\n",
       "      <td>[knapp, prozent, straftäter, flüchtlinge, wahr...</td>\n",
       "      <td>knapp prozent straftäter flüchtlinge wahrheit ...</td>\n",
       "      <td>457</td>\n",
       "      <td>458</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>042018</td>\n",
       "      <td>Durch Flüchtlingskrise | Jeder zehnte Hartz-Em...</td>\n",
       "      <td>[flüchtlingskrise, zehnte, stammt, syrien, ger...</td>\n",
       "      <td>flüchtlingskrise zehnte stammt syrien gerät un...</td>\n",
       "      <td>821</td>\n",
       "      <td>716</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>052014</td>\n",
       "      <td>Armutszuwanderer aus Ost-EU-Ländern: Hartz-Bez...</td>\n",
       "      <td>[armutszuwanderer, bulgarien, mehr, million, z...</td>\n",
       "      <td>armutszuwanderer bulgarien mehr million zuwand...</td>\n",
       "      <td>72</td>\n",
       "      <td>78</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>052015</td>\n",
       "      <td>Flüchtlinge Flüchtlinge                     Sc...</td>\n",
       "      <td>[flüchtlinge, flüchtlinge, schon, gewalt, berl...</td>\n",
       "      <td>flüchtlinge flüchtlinge schon gewalt berliner ...</td>\n",
       "      <td>14</td>\n",
       "      <td>17</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>052016</td>\n",
       "      <td>Die Chronologie der Abschottung | Ausländerrec...</td>\n",
       "      <td>[chronologie, abschottung, ausländerrecht, sch...</td>\n",
       "      <td>chronologie abschottung ausländerrecht scharf ...</td>\n",
       "      <td>723</td>\n",
       "      <td>706</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>052017</td>\n",
       "      <td>Rechte Bande jagte Asylbewerber am Elbufer | E...</td>\n",
       "      <td>[rechte, bande, jagte, asylbewerber, elbufer, ...</td>\n",
       "      <td>rechte bande jagte asylbewerber elbufer erster...</td>\n",
       "      <td>306</td>\n",
       "      <td>295</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>052018</td>\n",
       "      <td>Neues BKA-Lagebild mit überraschenden Fakten |...</td>\n",
       "      <td>[neues, überraschenden, fakten, kriminell, sin...</td>\n",
       "      <td>neues überraschenden fakten kriminell sindzuwa...</td>\n",
       "      <td>1230</td>\n",
       "      <td>974</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>062014</td>\n",
       "      <td>Flüchtlings-Tsunami überrollt Europa: Jeden Ta...</td>\n",
       "      <td>[überrollt, europa, tag, kommen, tausende, afr...</td>\n",
       "      <td>überrollt europa tag kommen tausende afrika vi...</td>\n",
       "      <td>45</td>\n",
       "      <td>55</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>062016</td>\n",
       "      <td>Politiker schlagen Alarm | Kinder-Ehe hatDeuts...</td>\n",
       "      <td>[politiker, schlagen, alarm, hatdeutschland, e...</td>\n",
       "      <td>politiker schlagen alarm hatdeutschland erreic...</td>\n",
       "      <td>1138</td>\n",
       "      <td>894</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>062017</td>\n",
       "      <td>| Keine Flucht in Hartz Ihr Gerät unterstützt ...</td>\n",
       "      <td>[flucht, hartz, gerät, unterstützt, aktivieren...</td>\n",
       "      <td>flucht hartz gerät unterstützt aktivieren java...</td>\n",
       "      <td>546</td>\n",
       "      <td>436</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>062018</td>\n",
       "      <td>Mörder, Vergewaltiger, Drogenhändler | Keiner ...</td>\n",
       "      <td>[mörder, vergewaltiger, drogenhändler, sagen, ...</td>\n",
       "      <td>mörder vergewaltiger drogenhändler sagen gerät...</td>\n",
       "      <td>3257</td>\n",
       "      <td>2002</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>072014</td>\n",
       "      <td>Wieder Mädchen in Indien vergewaltigt? Lynchju...</td>\n",
       "      <td>[mädchen, indien, vergewaltigt, lynchjustiz, w...</td>\n",
       "      <td>mädchen indien vergewaltigt lynchjustiz wütend...</td>\n",
       "      <td>42</td>\n",
       "      <td>57</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>072015</td>\n",
       "      <td>| Der Fall Reem Ihr Gerät unterstützt kein Jav...</td>\n",
       "      <td>[fall, reem, gerät, unterstützt, aktivieren, j...</td>\n",
       "      <td>fall reem gerät unterstützt aktivieren javascr...</td>\n",
       "      <td>181</td>\n",
       "      <td>156</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>072016</td>\n",
       "      <td>Neuer AfD-Hass | Gauland will Asylrecht für Mu...</td>\n",
       "      <td>[neuer, gauland, asylrecht, muslime, aussetzen...</td>\n",
       "      <td>neuer gauland asylrecht muslime aussetzen neue...</td>\n",
       "      <td>1071</td>\n",
       "      <td>976</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>072017</td>\n",
       "      <td>Rom will Häfen für Flüchtlinge schließen | Was...</td>\n",
       "      <td>[rom, häfen, flüchtlinge, schließen, passiert,...</td>\n",
       "      <td>rom häfen flüchtlinge schließen passiert itali...</td>\n",
       "      <td>651</td>\n",
       "      <td>478</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>072018</td>\n",
       "      <td>„Wir wollen nicht sterben!“ | 270 000 Syrer fl...</td>\n",
       "      <td>[sterben, syrer, flehen, öffnung, jordanischer...</td>\n",
       "      <td>sterben syrer flehen öffnung jordanischer gren...</td>\n",
       "      <td>2843</td>\n",
       "      <td>1898</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>082015</td>\n",
       "      <td>Merkel macht Druck bei Flüchtlingspolitik | „A...</td>\n",
       "      <td>[merkel, macht, druck, flüchtlingspolitik, sch...</td>\n",
       "      <td>merkel macht druck flüchtlingspolitik schnell ...</td>\n",
       "      <td>344</td>\n",
       "      <td>346</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>082016</td>\n",
       "      <td>Zuwanderung | Jeder 4. Hartz-IV-Bezieher ist m...</td>\n",
       "      <td>[zuwanderung, mittlerweile, ausländer, zuwande...</td>\n",
       "      <td>zuwanderung mittlerweile ausländer zuwanderung...</td>\n",
       "      <td>848</td>\n",
       "      <td>671</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>082017</td>\n",
       "      <td>„Einwanderer-Kontingente“ | Merkel offen für&lt;b...</td>\n",
       "      <td>[merkel, offen, br, pflegekräfte, afrika, gerä...</td>\n",
       "      <td>merkel offen br pflegekräfte afrika gerät unte...</td>\n",
       "      <td>804</td>\n",
       "      <td>595</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>082018</td>\n",
       "      <td>Vorstoß von Annegret Kramp-Karrenbauer | CDU w...</td>\n",
       "      <td>[vorstoß, annegret, cdu, dienstpflichtfür, flü...</td>\n",
       "      <td>vorstoß annegret cdu dienstpflichtfür flüchtli...</td>\n",
       "      <td>864</td>\n",
       "      <td>628</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>092016</td>\n",
       "      <td>Unicef-Bericht | Weltweit 28 Millionen Kinder&lt;...</td>\n",
       "      <td>[weltweit, millionen, kinder, br, flucht, gerä...</td>\n",
       "      <td>weltweit millionen kinder br flucht gerät unte...</td>\n",
       "      <td>1126</td>\n",
       "      <td>954</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>092017</td>\n",
       "      <td>Die letzte Entscheidungshilfe | Alle Wahlprogr...</td>\n",
       "      <td>[letzte, entscheidungshilfe, wahlprogramme, br...</td>\n",
       "      <td>letzte entscheidungshilfe wahlprogramme br ger...</td>\n",
       "      <td>2596</td>\n",
       "      <td>1680</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>092018</td>\n",
       "      <td>Nach Attacken in Frankfurt/oder | Linker Bürge...</td>\n",
       "      <td>[attacken, linker, bürgermeister, willintensiv...</td>\n",
       "      <td>attacken linker bürgermeister willintensivtäte...</td>\n",
       "      <td>653</td>\n",
       "      <td>482</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>102014</td>\n",
       "      <td>Traumatisiert im Syrien-Krieg | Aya (7) beginn...</td>\n",
       "      <td>[traumatisiert, aya, beginnt, tanzen, schüsse,...</td>\n",
       "      <td>traumatisiert aya beginnt tanzen schüsse hört ...</td>\n",
       "      <td>86</td>\n",
       "      <td>68</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>102015</td>\n",
       "      <td>In Berlin-Moabit | Mob attackiert Flüchtling a...</td>\n",
       "      <td>[mob, attackiert, flüchtling, offener, straße,...</td>\n",
       "      <td>mob attackiert flüchtling offener straße gerät...</td>\n",
       "      <td>227</td>\n",
       "      <td>286</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>102016</td>\n",
       "      <td>Missbrauch, Zwangsehe, Genitalverstümmelungen ...</td>\n",
       "      <td>[missbrauch, zwangsehe, genitalverstümmelungen...</td>\n",
       "      <td>missbrauch zwangsehe genitalverstümmelungen ge...</td>\n",
       "      <td>1139</td>\n",
       "      <td>922</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>102017</td>\n",
       "      <td>| Mühen und bemühen Ihr Gerät unterstützt kein...</td>\n",
       "      <td>[mühen, bemühen, gerät, unterstützt, aktiviere...</td>\n",
       "      <td>mühen bemühen gerät unterstützt aktivieren jav...</td>\n",
       "      <td>466</td>\n",
       "      <td>369</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>102018</td>\n",
       "      <td>Neue Zahlen | Weniger Menschen brauchen Sozial...</td>\n",
       "      <td>[neue, zahlen, weniger, menschen, brauchen, so...</td>\n",
       "      <td>neue zahlen weniger menschen brauchen sozialhi...</td>\n",
       "      <td>845</td>\n",
       "      <td>563</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>112015</td>\n",
       "      <td>Erst 116 von 160000 | Umsiedelung von Flüchtli...</td>\n",
       "      <td>[erst, umsiedelung, flüchtlingen, langsam, ger...</td>\n",
       "      <td>erst umsiedelung flüchtlingen langsam gerät un...</td>\n",
       "      <td>510</td>\n",
       "      <td>382</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>112016</td>\n",
       "      <td>Perfide Masche der Schleuser | Flüchtlinge mit...</td>\n",
       "      <td>[perfide, masche, schleuser, flüchtlinge, ruhi...</td>\n",
       "      <td>perfide masche schleuser flüchtlinge ruhiggest...</td>\n",
       "      <td>728</td>\n",
       "      <td>623</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>112017</td>\n",
       "      <td>| Warum? Ihr Gerät unterstützt kein Javascript...</td>\n",
       "      <td>[warum, gerät, unterstützt, aktivieren, javasc...</td>\n",
       "      <td>warum gerät unterstützt aktivieren javascript ...</td>\n",
       "      <td>641</td>\n",
       "      <td>479</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>112018</td>\n",
       "      <td>Vergewaltiger wird zur Abschreckung in die Tür...</td>\n",
       "      <td>[vergewaltiger, abschreckung, türkei, abgescho...</td>\n",
       "      <td>vergewaltiger abschreckung türkei abgeschoben ...</td>\n",
       "      <td>1250</td>\n",
       "      <td>935</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>122014</td>\n",
       "      <td>| Lieber Horst Seehofer, Ihr Gerät unterstützt...</td>\n",
       "      <td>[lieber, horst, seehofer, gerät, unterstützt, ...</td>\n",
       "      <td>lieber horst seehofer gerät unterstützt aktivi...</td>\n",
       "      <td>47</td>\n",
       "      <td>29</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>122015</td>\n",
       "      <td>Erste Statistik | Die Wahrheit über kriminelle...</td>\n",
       "      <td>[erste, statistik, wahrheit, kriminelle, asylb...</td>\n",
       "      <td>erste statistik wahrheit kriminelle asylbewerb...</td>\n",
       "      <td>387</td>\n",
       "      <td>304</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>122016</td>\n",
       "      <td>Asylrecht-Streit vor Parteitag | CDU-Abgeordne...</td>\n",
       "      <td>[parteitag, fordern, br, kurskorrektur, merkel...</td>\n",
       "      <td>parteitag fordern br kurskorrektur merkel gerä...</td>\n",
       "      <td>1408</td>\n",
       "      <td>1266</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>122017</td>\n",
       "      <td>Geheim-Bericht der EU | Immer mehr Flüchtlinge...</td>\n",
       "      <td>[eu, immer, mehr, flüchtlinge, br, nutzen, gef...</td>\n",
       "      <td>eu immer mehr flüchtlinge br nutzen gefälschte...</td>\n",
       "      <td>761</td>\n",
       "      <td>686</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>122018</td>\n",
       "      <td>So ist die Lage auf dem Arbeitsmarkt wirklich ...</td>\n",
       "      <td>[lage, arbeitsmarkt, wirklich, zwei, drei, flü...</td>\n",
       "      <td>lage arbeitsmarkt wirklich zwei drei flüchtlin...</td>\n",
       "      <td>688</td>\n",
       "      <td>456</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012014  Migrationsbericht: Zahl der Zuwanderer auf höc...   \n",
       "1   012015  Nach Massaker durch Islamisten in Paris | So s...   \n",
       "2   012016  Bei sexuellen Übergriffen und seriellen Straft...   \n",
       "3   012017  Dramatisches Video von sinkendem Schiff | 1000...   \n",
       "4   012018  Bayerns Innenminister Joachim Herrmann (61, CS...   \n",
       "5   022014  Maischberger-Talk: Euro-Hasser blamiert sich b...   \n",
       "6   022016  Abgelehnte Asylbewerber | Scharfe Kritik an Rü...   \n",
       "7   022017  Schockzahl 8 Millionen | Wie viele „Illegale“<...   \n",
       "8   022018  Autos und Haus beschmiert | Tafel-Chef droht m...   \n",
       "9   032014  Angelina Jolie fordert Ende des Syrischen Bürg...   \n",
       "10  032015  Berufsschullehrer und seine Frau nehmen Flücht...   \n",
       "11  032016  Einigung beim EU-Gipfel | Was der Flüchtlings-...   \n",
       "12  032017  In einer Zentralen Erstaufnahmeeinrichtung | M...   \n",
       "13  032018  Plädoyer im Fall Hussein K., dem Killer von St...   \n",
       "14  042014  Leserbrief an die GdP: Polizisten schreibt übe...   \n",
       "15  042015  Notfall-Plan der Stadt | 800 Flüchtlinge ins O...   \n",
       "16  042016  Koalition beschließt neue Regeln zu Asyl, Absc...   \n",
       "17  042017  Knapp 7 Prozent aller Straftäter sind Flüchtli...   \n",
       "18  042018  Durch Flüchtlingskrise | Jeder zehnte Hartz-Em...   \n",
       "19  052014  Armutszuwanderer aus Ost-EU-Ländern: Hartz-Bez...   \n",
       "20  052015  Flüchtlinge Flüchtlinge                     Sc...   \n",
       "21  052016  Die Chronologie der Abschottung | Ausländerrec...   \n",
       "22  052017  Rechte Bande jagte Asylbewerber am Elbufer | E...   \n",
       "23  052018  Neues BKA-Lagebild mit überraschenden Fakten |...   \n",
       "24  062014  Flüchtlings-Tsunami überrollt Europa: Jeden Ta...   \n",
       "25  062016  Politiker schlagen Alarm | Kinder-Ehe hatDeuts...   \n",
       "26  062017  | Keine Flucht in Hartz Ihr Gerät unterstützt ...   \n",
       "27  062018  Mörder, Vergewaltiger, Drogenhändler | Keiner ...   \n",
       "28  072014  Wieder Mädchen in Indien vergewaltigt? Lynchju...   \n",
       "29  072015  | Der Fall Reem Ihr Gerät unterstützt kein Jav...   \n",
       "30  072016  Neuer AfD-Hass | Gauland will Asylrecht für Mu...   \n",
       "31  072017  Rom will Häfen für Flüchtlinge schließen | Was...   \n",
       "32  072018  „Wir wollen nicht sterben!“ | 270 000 Syrer fl...   \n",
       "33  082015  Merkel macht Druck bei Flüchtlingspolitik | „A...   \n",
       "34  082016  Zuwanderung | Jeder 4. Hartz-IV-Bezieher ist m...   \n",
       "35  082017  „Einwanderer-Kontingente“ | Merkel offen für<b...   \n",
       "36  082018  Vorstoß von Annegret Kramp-Karrenbauer | CDU w...   \n",
       "37  092016  Unicef-Bericht | Weltweit 28 Millionen Kinder<...   \n",
       "38  092017  Die letzte Entscheidungshilfe | Alle Wahlprogr...   \n",
       "39  092018  Nach Attacken in Frankfurt/oder | Linker Bürge...   \n",
       "40  102014  Traumatisiert im Syrien-Krieg | Aya (7) beginn...   \n",
       "41  102015  In Berlin-Moabit | Mob attackiert Flüchtling a...   \n",
       "42  102016  Missbrauch, Zwangsehe, Genitalverstümmelungen ...   \n",
       "43  102017  | Mühen und bemühen Ihr Gerät unterstützt kein...   \n",
       "44  102018  Neue Zahlen | Weniger Menschen brauchen Sozial...   \n",
       "45  112015  Erst 116 von 160000 | Umsiedelung von Flüchtli...   \n",
       "46  112016  Perfide Masche der Schleuser | Flüchtlinge mit...   \n",
       "47  112017  | Warum? Ihr Gerät unterstützt kein Javascript...   \n",
       "48  112018  Vergewaltiger wird zur Abschreckung in die Tür...   \n",
       "49  122014  | Lieber Horst Seehofer, Ihr Gerät unterstützt...   \n",
       "50  122015  Erste Statistik | Die Wahrheit über kriminelle...   \n",
       "51  122016  Asylrecht-Streit vor Parteitag | CDU-Abgeordne...   \n",
       "52  122017  Geheim-Bericht der EU | Immer mehr Flüchtlinge...   \n",
       "53  122018  So ist die Lage auf dem Arbeitsmarkt wirklich ...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [migrationsbericht, zahl, zuwanderer, höchstem...   \n",
       "1   [massaker, islamisten, paris, schamlos, nutzen...   \n",
       "2   [sexuellen, übergriffen, seriellen, straftaten...   \n",
       "3   [dramatisches, video, sinkendem, schiff, flüch...   \n",
       "4   [bayerns, innenminister, joachim, herrmann, cs...   \n",
       "5   [blamiert, maischberger, mehr, infos, bernd, l...   \n",
       "6   [abgelehnte, asylbewerber, scharfe, kritik, as...   \n",
       "7   [schockzahl, millionen, viele, illegale, br, t...   \n",
       "8   [autos, haus, beschmiert, droht, rücktritt, ge...   \n",
       "9   [angelina, jolie, fordert, ende, syrischen, bü...   \n",
       "10  [berufsschullehrer, frau, nehmen, flüchtlinge,...   \n",
       "11  [einigung, beim, türkei, bringt, einigung, brü...   \n",
       "12  [zentralen, erstaufnahmeeinrichtung, mädchen, ...   \n",
       "13  [plädoyer, fall, hussein, killer, studentin, m...   \n",
       "14  [leserbrief, gdp, polizisten, schreibt, respek...   \n",
       "15  [stadt, flüchtlinge, gerät, unterstützt, aktiv...   \n",
       "16  [koalition, beschließt, neue, regeln, asyl, ab...   \n",
       "17  [knapp, prozent, straftäter, flüchtlinge, wahr...   \n",
       "18  [flüchtlingskrise, zehnte, stammt, syrien, ger...   \n",
       "19  [armutszuwanderer, bulgarien, mehr, million, z...   \n",
       "20  [flüchtlinge, flüchtlinge, schon, gewalt, berl...   \n",
       "21  [chronologie, abschottung, ausländerrecht, sch...   \n",
       "22  [rechte, bande, jagte, asylbewerber, elbufer, ...   \n",
       "23  [neues, überraschenden, fakten, kriminell, sin...   \n",
       "24  [überrollt, europa, tag, kommen, tausende, afr...   \n",
       "25  [politiker, schlagen, alarm, hatdeutschland, e...   \n",
       "26  [flucht, hartz, gerät, unterstützt, aktivieren...   \n",
       "27  [mörder, vergewaltiger, drogenhändler, sagen, ...   \n",
       "28  [mädchen, indien, vergewaltigt, lynchjustiz, w...   \n",
       "29  [fall, reem, gerät, unterstützt, aktivieren, j...   \n",
       "30  [neuer, gauland, asylrecht, muslime, aussetzen...   \n",
       "31  [rom, häfen, flüchtlinge, schließen, passiert,...   \n",
       "32  [sterben, syrer, flehen, öffnung, jordanischer...   \n",
       "33  [merkel, macht, druck, flüchtlingspolitik, sch...   \n",
       "34  [zuwanderung, mittlerweile, ausländer, zuwande...   \n",
       "35  [merkel, offen, br, pflegekräfte, afrika, gerä...   \n",
       "36  [vorstoß, annegret, cdu, dienstpflichtfür, flü...   \n",
       "37  [weltweit, millionen, kinder, br, flucht, gerä...   \n",
       "38  [letzte, entscheidungshilfe, wahlprogramme, br...   \n",
       "39  [attacken, linker, bürgermeister, willintensiv...   \n",
       "40  [traumatisiert, aya, beginnt, tanzen, schüsse,...   \n",
       "41  [mob, attackiert, flüchtling, offener, straße,...   \n",
       "42  [missbrauch, zwangsehe, genitalverstümmelungen...   \n",
       "43  [mühen, bemühen, gerät, unterstützt, aktiviere...   \n",
       "44  [neue, zahlen, weniger, menschen, brauchen, so...   \n",
       "45  [erst, umsiedelung, flüchtlingen, langsam, ger...   \n",
       "46  [perfide, masche, schleuser, flüchtlinge, ruhi...   \n",
       "47  [warum, gerät, unterstützt, aktivieren, javasc...   \n",
       "48  [vergewaltiger, abschreckung, türkei, abgescho...   \n",
       "49  [lieber, horst, seehofer, gerät, unterstützt, ...   \n",
       "50  [erste, statistik, wahrheit, kriminelle, asylb...   \n",
       "51  [parteitag, fordern, br, kurskorrektur, merkel...   \n",
       "52  [eu, immer, mehr, flüchtlinge, br, nutzen, gef...   \n",
       "53  [lage, arbeitsmarkt, wirklich, zwei, drei, flü...   \n",
       "\n",
       "                                           liststring  positive words  \\\n",
       "0   migrationsbericht zahl zuwanderer höchstem sta...             239   \n",
       "1   massaker islamisten paris schamlos nutzen afd ...              72   \n",
       "2   sexuellen übergriffen seriellen straftaten sch...            1285   \n",
       "3   dramatisches video sinkendem schiff flüchtling...             642   \n",
       "4   bayerns innenminister joachim herrmann csu jun...             659   \n",
       "5   blamiert maischberger mehr infos bernd lucke m...             206   \n",
       "6   abgelehnte asylbewerber scharfe kritik asylbew...             463   \n",
       "7   schockzahl millionen viele illegale br trump r...             984   \n",
       "8   autos haus beschmiert droht rücktritt gerät un...             199   \n",
       "9   angelina jolie fordert ende syrischen bürgerkr...             198   \n",
       "10  berufsschullehrer frau nehmen flüchtlinge syri...              52   \n",
       "11  einigung beim türkei bringt einigung brüssel b...             235   \n",
       "12  zentralen erstaufnahmeeinrichtung mädchen fünf...             871   \n",
       "13  plädoyer fall hussein killer studentin maria s...             531   \n",
       "14  leserbrief gdp polizisten schreibt respektlosi...              77   \n",
       "15  stadt flüchtlinge gerät unterstützt aktivieren...              49   \n",
       "16  koalition beschließt neue regeln asyl abschieb...             633   \n",
       "17  knapp prozent straftäter flüchtlinge wahrheit ...             457   \n",
       "18  flüchtlingskrise zehnte stammt syrien gerät un...             821   \n",
       "19  armutszuwanderer bulgarien mehr million zuwand...              72   \n",
       "20  flüchtlinge flüchtlinge schon gewalt berliner ...              14   \n",
       "21  chronologie abschottung ausländerrecht scharf ...             723   \n",
       "22  rechte bande jagte asylbewerber elbufer erster...             306   \n",
       "23  neues überraschenden fakten kriminell sindzuwa...            1230   \n",
       "24  überrollt europa tag kommen tausende afrika vi...              45   \n",
       "25  politiker schlagen alarm hatdeutschland erreic...            1138   \n",
       "26  flucht hartz gerät unterstützt aktivieren java...             546   \n",
       "27  mörder vergewaltiger drogenhändler sagen gerät...            3257   \n",
       "28  mädchen indien vergewaltigt lynchjustiz wütend...              42   \n",
       "29  fall reem gerät unterstützt aktivieren javascr...             181   \n",
       "30  neuer gauland asylrecht muslime aussetzen neue...            1071   \n",
       "31  rom häfen flüchtlinge schließen passiert itali...             651   \n",
       "32  sterben syrer flehen öffnung jordanischer gren...            2843   \n",
       "33  merkel macht druck flüchtlingspolitik schnell ...             344   \n",
       "34  zuwanderung mittlerweile ausländer zuwanderung...             848   \n",
       "35  merkel offen br pflegekräfte afrika gerät unte...             804   \n",
       "36  vorstoß annegret cdu dienstpflichtfür flüchtli...             864   \n",
       "37  weltweit millionen kinder br flucht gerät unte...            1126   \n",
       "38  letzte entscheidungshilfe wahlprogramme br ger...            2596   \n",
       "39  attacken linker bürgermeister willintensivtäte...             653   \n",
       "40  traumatisiert aya beginnt tanzen schüsse hört ...              86   \n",
       "41  mob attackiert flüchtling offener straße gerät...             227   \n",
       "42  missbrauch zwangsehe genitalverstümmelungen ge...            1139   \n",
       "43  mühen bemühen gerät unterstützt aktivieren jav...             466   \n",
       "44  neue zahlen weniger menschen brauchen sozialhi...             845   \n",
       "45  erst umsiedelung flüchtlingen langsam gerät un...             510   \n",
       "46  perfide masche schleuser flüchtlinge ruhiggest...             728   \n",
       "47  warum gerät unterstützt aktivieren javascript ...             641   \n",
       "48  vergewaltiger abschreckung türkei abgeschoben ...            1250   \n",
       "49  lieber horst seehofer gerät unterstützt aktivi...              47   \n",
       "50  erste statistik wahrheit kriminelle asylbewerb...             387   \n",
       "51  parteitag fordern br kurskorrektur merkel gerä...            1408   \n",
       "52  eu immer mehr flüchtlinge br nutzen gefälschte...             761   \n",
       "53  lage arbeitsmarkt wirklich zwei drei flüchtlin...             688   \n",
       "\n",
       "    negative words   overall  \n",
       "0               87  positive  \n",
       "1               87  negative  \n",
       "2             1117  positive  \n",
       "3              538  positive  \n",
       "4              593  positive  \n",
       "5              103  positive  \n",
       "6              396  positive  \n",
       "7              760  positive  \n",
       "8              193  positive  \n",
       "9              228  negative  \n",
       "10              33  positive  \n",
       "11             224  positive  \n",
       "12             766  positive  \n",
       "13             497  positive  \n",
       "14              59  positive  \n",
       "15              38  positive  \n",
       "16             524  positive  \n",
       "17             458  negative  \n",
       "18             716  positive  \n",
       "19              78  negative  \n",
       "20              17  negative  \n",
       "21             706  positive  \n",
       "22             295  positive  \n",
       "23             974  positive  \n",
       "24              55  negative  \n",
       "25             894  positive  \n",
       "26             436  positive  \n",
       "27            2002  positive  \n",
       "28              57  negative  \n",
       "29             156  positive  \n",
       "30             976  positive  \n",
       "31             478  positive  \n",
       "32            1898  positive  \n",
       "33             346  negative  \n",
       "34             671  positive  \n",
       "35             595  positive  \n",
       "36             628  positive  \n",
       "37             954  positive  \n",
       "38            1680  positive  \n",
       "39             482  positive  \n",
       "40              68  positive  \n",
       "41             286  negative  \n",
       "42             922  positive  \n",
       "43             369  positive  \n",
       "44             563  positive  \n",
       "45             382  positive  \n",
       "46             623  positive  \n",
       "47             479  positive  \n",
       "48             935  positive  \n",
       "49              29  positive  \n",
       "50             304  positive  \n",
       "51            1266  positive  \n",
       "52             686  positive  \n",
       "53             456  positive  "
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(bild_rel_final['positive words'] > bild_rel_final['negative words']),\n",
    "(bild_rel_final['negative words'] > bild_rel_final['positive words']),\n",
    "(bild_rel_final['negative words'] == bild_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "bild_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "bild_rel_final\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Migrationsbericht: Zahl der Zuwanderer auf höc...</td>\n",
       "      <td>migrationsbericht zahl zuwanderer höchstem sta...</td>\n",
       "      <td>239</td>\n",
       "      <td>87</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nach Massaker durch Islamisten in Paris | So s...</td>\n",
       "      <td>massaker islamisten paris schamlos nutzen afd ...</td>\n",
       "      <td>72</td>\n",
       "      <td>87</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Bei sexuellen Übergriffen und seriellen Straft...</td>\n",
       "      <td>sexuellen übergriffen seriellen straftaten sch...</td>\n",
       "      <td>1285</td>\n",
       "      <td>1117</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Dramatisches Video von sinkendem Schiff | 1000...</td>\n",
       "      <td>dramatisches video sinkendem schiff flüchtling...</td>\n",
       "      <td>642</td>\n",
       "      <td>538</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Bayerns Innenminister Joachim Herrmann (61, CS...</td>\n",
       "      <td>bayerns innenminister joachim herrmann csu jun...</td>\n",
       "      <td>659</td>\n",
       "      <td>593</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012014  Migrationsbericht: Zahl der Zuwanderer auf höc...   \n",
       "1     012015  Nach Massaker durch Islamisten in Paris | So s...   \n",
       "2     012016  Bei sexuellen Übergriffen und seriellen Straft...   \n",
       "3     012017  Dramatisches Video von sinkendem Schiff | 1000...   \n",
       "4     012018  Bayerns Innenminister Joachim Herrmann (61, CS...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  migrationsbericht zahl zuwanderer höchstem sta...             239   \n",
       "1  massaker islamisten paris schamlos nutzen afd ...              72   \n",
       "2  sexuellen übergriffen seriellen straftaten sch...            1285   \n",
       "3  dramatisches video sinkendem schiff flüchtling...             642   \n",
       "4  bayerns innenminister joachim herrmann csu jun...             659   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0              87          positive  \n",
       "1              87          negative  \n",
       "2            1117          positive  \n",
       "3             538          positive  \n",
       "4             593          positive  "
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del bild_rel_final['nlpprocessed']\n",
    "bild_rel_final = bild_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "bild_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/bild_sentiment.csv\")\n",
    "bild_rel_final.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Faz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# FAZ\n",
    "\n",
    "faz = pd.read_csv(\"/Users/ashrakatelshehawy/faz_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None,  error_bad_lines=False)\n",
    "#remove unnecessary columns\n",
    "faz_rel = faz.drop(faz.columns[[0,1,5,6,7,8]], axis=1)\n",
    "\n",
    "#give column names\n",
    "faz_rel.columns = ['date', 'title',\"content\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8641</th>\n",
       "      <td>20170117</td>\n",
       "      <td>Flüchtlinge sollen Flüchtlingsheim angezündet ...</td>\n",
       "      <td>Flüchtlingsheim angezündet: B. und sein Brands...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>869</th>\n",
       "      <td>20170108</td>\n",
       "      <td>Kriminalität von jungen Nordafrikanern: Das Mi...</td>\n",
       "      <td>Kriminalität von jungen Nordafrikanern: Das Mi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1577</th>\n",
       "      <td>20160306</td>\n",
       "      <td>Schulz: Türkei könnte historische Chance versp...</td>\n",
       "      <td>EU-Parlamentspräsident Schulz nach der „Zaman“...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2761</th>\n",
       "      <td>20151127</td>\n",
       "      <td>Kommentar zu Flugzeugabschuss: Wer ist der Geg...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1239</th>\n",
       "      <td>20140113</td>\n",
       "      <td>Syrien-Konferenz Opposition knüpft Teilnahme a...</td>\n",
       "      <td>Bitte melden Sie sich an. 12.01.2014          ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date                                              title  \\\n",
       "8641  20170117  Flüchtlinge sollen Flüchtlingsheim angezündet ...   \n",
       "869   20170108  Kriminalität von jungen Nordafrikanern: Das Mi...   \n",
       "1577  20160306  Schulz: Türkei könnte historische Chance versp...   \n",
       "2761  20151127  Kommentar zu Flugzeugabschuss: Wer ist der Geg...   \n",
       "1239  20140113  Syrien-Konferenz Opposition knüpft Teilnahme a...   \n",
       "\n",
       "                                                content  \n",
       "8641  Flüchtlingsheim angezündet: B. und sein Brands...  \n",
       "869   Kriminalität von jungen Nordafrikanern: Das Mi...  \n",
       "1577  EU-Parlamentspräsident Schulz nach der „Zaman“...  \n",
       "2761                                                ...  \n",
       "1239  Bitte melden Sie sich an. 12.01.2014          ...  "
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#take random sample of 25\n",
    "faz_rel_sample=faz_rel.sample(25)\n",
    "faz_rel_sample.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Hartz-IV-Streit EU-Parlamentspräsident Schulz ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nantes: Flüchtlinge besetzen französisches Pfa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Flüchtlingskrise: Mazedonien öffnet Grenze für...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Asylbewerber: Opfer rechter Gewalt dürfen blei...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Amerika erwägt Trennung von Familien bei illeg...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>022014</td>\n",
       "      <td>Zentralafrikanische Republik Flüchtlinge warte...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>022015</td>\n",
       "      <td>Flüchtlinge aus dem Kosovo: Das kurze Glück im...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>022016</td>\n",
       "      <td>Berlin: De Maizière verteidigt schnellere Absc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>022017</td>\n",
       "      <td>48.000 Opfer von Genitalverstümmelungen in Deu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>022018</td>\n",
       "      <td>Zuwanderung und Arbeit: Migrantinnen entlasten...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>032014</td>\n",
       "      <td>Lampedusa: Mehr als 500 Flüchtlinge vor Lamped...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>032015</td>\n",
       "      <td>Zahl der Ausländer in Deutschland erreicht Rek...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>032016</td>\n",
       "      <td>Bankwesen: Jedermann-Konto auch für Flüchtling...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>032017</td>\n",
       "      <td>Bundestagswahl: AfD will „kriminelle Migranten...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>032018</td>\n",
       "      <td>Mindestens 14 Menschen vor Griechenland ertrun...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>042014</td>\n",
       "      <td>Flüchtlinge in Griechenland: „Die Hölle auf Er...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>042015</td>\n",
       "      <td>Athen will zehntausenden Flüchtlingen Asyl gew...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>042016</td>\n",
       "      <td>Video: Proteste gegen Aufnahme von Flüchtlinge...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>042017</td>\n",
       "      <td>Trotz Rettungsmissionen: Mindestens 20 Flüchtl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>042018</td>\n",
       "      <td>Hartz IV: Jeder 10. Bezieher in Deutschland st...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>052014</td>\n",
       "      <td>Europa-Wahlkampf: Kindergeld für EU-Ausländer ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>052015</td>\n",
       "      <td>Angela Merkel pocht trotz Widerstand auf Flüch...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>052016</td>\n",
       "      <td>Frankreich: Afghanische Filmstars als Flüchtli...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>052017</td>\n",
       "      <td>Bundestag beschließt Maßnahmen für effektivere...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>052018</td>\n",
       "      <td>Zehntausende müssen zurück nach Honduras Warum...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>062014</td>\n",
       "      <td>UN-Bericht: Mehr als 50 Millionen Menschen auf...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>062015</td>\n",
       "      <td>Grenze geschlossen: Syrische Flüchtlinge sitze...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>062016</td>\n",
       "      <td>Mittelmeer: Italienische Küstenwache rettet 13...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>062017</td>\n",
       "      <td>Niederlande mitverantwortlich für Srebrenica-O...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>062018</td>\n",
       "      <td>F.A.Z. Einspruch Podcast 13. Juni 2018 von Con...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>072014</td>\n",
       "      <td>Die Zahl der minderjährigen Flüchtlinge steigt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>072015</td>\n",
       "      <td>EU-Staaten erreichen nur Teillösung im Streit ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>072016</td>\n",
       "      <td>München: Minderjährige Flüchtlinge vor Radikal...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>072017</td>\n",
       "      <td>Leiter des World Food Programme zu Trumps Plän...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>072018</td>\n",
       "      <td>CSU ringt um Antwort auf Merkels Asyl-Paket Wa...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>082014</td>\n",
       "      <td>Asylbewerber in Deutschland: Überfordert mit d...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>082015</td>\n",
       "      <td>Griechenland: Flüchtlinge durchbrechen Grenze ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>082016</td>\n",
       "      <td>Integration: Junge Flüchtlinge arbeiten für di...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>082017</td>\n",
       "      <td>AfD will Flüchtlinge nach Libyen zurückschicke...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>082018</td>\n",
       "      <td>Migranten auf dem Arbeitsmarkt: Rational diskr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>092014</td>\n",
       "      <td>Flüchtlinge in Deutschland: De Maizière forder...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>092015</td>\n",
       "      <td>Übergriffe auf Flüchtlingsunterkünfte 17.07.20...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>092016</td>\n",
       "      <td>Sachsen: Flüchtlinge und Rechte prügeln sich i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>092017</td>\n",
       "      <td>Wegen „Dreamers“-Entscheidung: Obama attackier...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>092018</td>\n",
       "      <td>F.A.Z. Einspruch Podcast 19. September 2018 vo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>102014</td>\n",
       "      <td>Syrische Flüchtlinge im Wettlauf gegen den Win...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>102015</td>\n",
       "      <td>Freiwillige helfen aus: Kostenloses WLAN für F...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>102016</td>\n",
       "      <td>Langwierige Protestaktion: Flüchtlinge marschi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>102017</td>\n",
       "      <td>Frankreichs Asylpolitik: Schwacher Staat Warum...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>102018</td>\n",
       "      <td>Hanks Welt: Wohin mit den Migranten? Warum seh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>112014</td>\n",
       "      <td>Einwanderung: De Maizière will Kriminelle früh...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>112015</td>\n",
       "      <td>Flüchtlingskrise: Gewalt gegen Flüchtlinge in ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>112016</td>\n",
       "      <td>Kinderehen: Die eigene Ordnung Kommentar: Die ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>112017</td>\n",
       "      <td>Folge von Migration: Immer mehr Kinder leben v...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>112018</td>\n",
       "      <td>Bürgschaften für Flüchtlinge, Wiesbadener soll...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>122014</td>\n",
       "      <td>Asylbewerber: Bleiberecht wird reformiert Bitt...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>122015</td>\n",
       "      <td>Neuankömmlinge in Deutschland: De Maizière: Zu...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>122016</td>\n",
       "      <td>Abgelehnte Asylbewerber: Schaffen wir Abschieb...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>122017</td>\n",
       "      <td>Politiker fordern medizinische Altersprüfung b...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>122018</td>\n",
       "      <td>F.A.Z. Einspruch Podcast 19. Dezember 2018 von...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content\n",
       "0   012014  Hartz-IV-Streit EU-Parlamentspräsident Schulz ...\n",
       "1   012015  Nantes: Flüchtlinge besetzen französisches Pfa...\n",
       "2   012016  Flüchtlingskrise: Mazedonien öffnet Grenze für...\n",
       "3   012017  Asylbewerber: Opfer rechter Gewalt dürfen blei...\n",
       "4   012018  Amerika erwägt Trennung von Familien bei illeg...\n",
       "5   022014  Zentralafrikanische Republik Flüchtlinge warte...\n",
       "6   022015  Flüchtlinge aus dem Kosovo: Das kurze Glück im...\n",
       "7   022016  Berlin: De Maizière verteidigt schnellere Absc...\n",
       "8   022017  48.000 Opfer von Genitalverstümmelungen in Deu...\n",
       "9   022018  Zuwanderung und Arbeit: Migrantinnen entlasten...\n",
       "10  032014  Lampedusa: Mehr als 500 Flüchtlinge vor Lamped...\n",
       "11  032015  Zahl der Ausländer in Deutschland erreicht Rek...\n",
       "12  032016  Bankwesen: Jedermann-Konto auch für Flüchtling...\n",
       "13  032017  Bundestagswahl: AfD will „kriminelle Migranten...\n",
       "14  032018  Mindestens 14 Menschen vor Griechenland ertrun...\n",
       "15  042014  Flüchtlinge in Griechenland: „Die Hölle auf Er...\n",
       "16  042015  Athen will zehntausenden Flüchtlingen Asyl gew...\n",
       "17  042016  Video: Proteste gegen Aufnahme von Flüchtlinge...\n",
       "18  042017  Trotz Rettungsmissionen: Mindestens 20 Flüchtl...\n",
       "19  042018  Hartz IV: Jeder 10. Bezieher in Deutschland st...\n",
       "20  052014  Europa-Wahlkampf: Kindergeld für EU-Ausländer ...\n",
       "21  052015  Angela Merkel pocht trotz Widerstand auf Flüch...\n",
       "22  052016  Frankreich: Afghanische Filmstars als Flüchtli...\n",
       "23  052017  Bundestag beschließt Maßnahmen für effektivere...\n",
       "24  052018  Zehntausende müssen zurück nach Honduras Warum...\n",
       "25  062014  UN-Bericht: Mehr als 50 Millionen Menschen auf...\n",
       "26  062015  Grenze geschlossen: Syrische Flüchtlinge sitze...\n",
       "27  062016  Mittelmeer: Italienische Küstenwache rettet 13...\n",
       "28  062017  Niederlande mitverantwortlich für Srebrenica-O...\n",
       "29  062018  F.A.Z. Einspruch Podcast 13. Juni 2018 von Con...\n",
       "30  072014  Die Zahl der minderjährigen Flüchtlinge steigt...\n",
       "31  072015  EU-Staaten erreichen nur Teillösung im Streit ...\n",
       "32  072016  München: Minderjährige Flüchtlinge vor Radikal...\n",
       "33  072017  Leiter des World Food Programme zu Trumps Plän...\n",
       "34  072018  CSU ringt um Antwort auf Merkels Asyl-Paket Wa...\n",
       "35  082014  Asylbewerber in Deutschland: Überfordert mit d...\n",
       "36  082015  Griechenland: Flüchtlinge durchbrechen Grenze ...\n",
       "37  082016  Integration: Junge Flüchtlinge arbeiten für di...\n",
       "38  082017  AfD will Flüchtlinge nach Libyen zurückschicke...\n",
       "39  082018  Migranten auf dem Arbeitsmarkt: Rational diskr...\n",
       "40  092014  Flüchtlinge in Deutschland: De Maizière forder...\n",
       "41  092015  Übergriffe auf Flüchtlingsunterkünfte 17.07.20...\n",
       "42  092016  Sachsen: Flüchtlinge und Rechte prügeln sich i...\n",
       "43  092017  Wegen „Dreamers“-Entscheidung: Obama attackier...\n",
       "44  092018  F.A.Z. Einspruch Podcast 19. September 2018 vo...\n",
       "45  102014  Syrische Flüchtlinge im Wettlauf gegen den Win...\n",
       "46  102015  Freiwillige helfen aus: Kostenloses WLAN für F...\n",
       "47  102016  Langwierige Protestaktion: Flüchtlinge marschi...\n",
       "48  102017  Frankreichs Asylpolitik: Schwacher Staat Warum...\n",
       "49  102018  Hanks Welt: Wohin mit den Migranten? Warum seh...\n",
       "50  112014  Einwanderung: De Maizière will Kriminelle früh...\n",
       "51  112015  Flüchtlingskrise: Gewalt gegen Flüchtlinge in ...\n",
       "52  112016  Kinderehen: Die eigene Ordnung Kommentar: Die ...\n",
       "53  112017  Folge von Migration: Immer mehr Kinder leben v...\n",
       "54  112018  Bürgschaften für Flüchtlinge, Wiesbadener soll...\n",
       "55  122014  Asylbewerber: Bleiberecht wird reformiert Bitt...\n",
       "56  122015  Neuankömmlinge in Deutschland: De Maizière: Zu...\n",
       "57  122016  Abgelehnte Asylbewerber: Schaffen wir Abschieb...\n",
       "58  122017  Politiker fordern medizinische Altersprüfung b...\n",
       "59  122018  F.A.Z. Einspruch Podcast 19. Dezember 2018 von..."
      ]
     },
     "execution_count": 89,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "# merge title and content in one variable - becaue we will use the whole text to produce the embeddings\n",
    "faz_rel[\"content\"]=faz_rel[\"title\"]+[\" \"]+faz_rel[\"content\"]\n",
    "\n",
    "#remove the title column\n",
    "faz_rel.drop(faz_rel.columns[1], axis=1, inplace=True)\n",
    "\n",
    "# lets remove the day because we dont need it\n",
    "faz_rel[\"date\"] = faz_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "\n",
    "#extract last two digits to change months structure\n",
    "\n",
    "#first change to charachter\n",
    "faz_rel['date1'] = faz_rel['date'].apply(str)\n",
    "\n",
    "#new column with months and years\n",
    "faz_rel['month'] = faz_rel['date1'].str[4:6]\n",
    "faz_rel['year'] = faz_rel['date1'].str[0:4]\n",
    "\n",
    "#same date structure as the other datasets\n",
    "faz_rel[\"my\"]=faz_rel[\"month\"]+faz_rel[\"year\"]\n",
    "\n",
    "#change month interger to month name\n",
    "\n",
    "#revert back to interger\n",
    "faz_rel['month'] = faz_rel['month'].apply(int)\n",
    "\n",
    "faz_rel2=faz_rel\n",
    "\n",
    "#use calendar to change month number to name\n",
    "faz_rel2['month'] = faz_rel2['month'].apply(lambda x: calendar.month_name[x])\n",
    "\n",
    "\n",
    "#have a consistent date variable for all datasets\n",
    "faz_rel2[\"month-year\"] = faz_rel2[\"month\"] +[\" \"]+ faz_rel2[\"year\"] \n",
    "\n",
    "\n",
    "#aggregate over months\n",
    "faz_rel2=faz_rel2.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "faz_rel2\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp\n",
    "faz_rel2[\"nlpprocessed\"]=faz_rel2['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "faz_rel2['liststring'] = [','.join(map(str, l)) for l in faz_rel2['nlpprocessed']]\n",
    "faz_rel2['liststring'] = (faz_rel2['liststring'].replace(',',' ', regex=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "faz_rel_final=faz_rel2\n",
    "faz_rel_final['positive words'] = faz_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "faz_rel_final['negative words'] = faz_rel_final['liststring'].str.count('|'.join(negative_list1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Hartz-IV-Streit EU-Parlamentspräsident Schulz ...</td>\n",
       "      <td>[schulz, warnt, brüssel, bitte, melden, katego...</td>\n",
       "      <td>schulz warnt brüssel bitte melden kategorische...</td>\n",
       "      <td>12562</td>\n",
       "      <td>9510</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nantes: Flüchtlinge besetzen französisches Pfa...</td>\n",
       "      <td>[nantes, flüchtlinge, besetzen, französisches,...</td>\n",
       "      <td>nantes flüchtlinge besetzen französisches pfar...</td>\n",
       "      <td>9263</td>\n",
       "      <td>8069</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Flüchtlingskrise: Mazedonien öffnet Grenze für...</td>\n",
       "      <td>[flüchtlingskrise, mazedonien, öffnet, grenze,...</td>\n",
       "      <td>flüchtlingskrise mazedonien öffnet grenze ausg...</td>\n",
       "      <td>35265</td>\n",
       "      <td>29230</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Asylbewerber: Opfer rechter Gewalt dürfen blei...</td>\n",
       "      <td>[asylbewerber, opfer, rechter, gewalt, dürfen,...</td>\n",
       "      <td>asylbewerber opfer rechter gewalt dürfen bleib...</td>\n",
       "      <td>23132</td>\n",
       "      <td>17372</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Amerika erwägt Trennung von Familien bei illeg...</td>\n",
       "      <td>[amerika, erwägt, trennung, familien, illegale...</td>\n",
       "      <td>amerika erwägt trennung familien illegaler ein...</td>\n",
       "      <td>11322</td>\n",
       "      <td>9099</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>022014</td>\n",
       "      <td>Zentralafrikanische Republik Flüchtlinge warte...</td>\n",
       "      <td>[zentralafrikanische, republik, flüchtlinge, w...</td>\n",
       "      <td>zentralafrikanische republik flüchtlinge warte...</td>\n",
       "      <td>11880</td>\n",
       "      <td>8128</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>022015</td>\n",
       "      <td>Flüchtlinge aus dem Kosovo: Das kurze Glück im...</td>\n",
       "      <td>[flüchtlinge, kosovo, kurze, glück, regenbogen...</td>\n",
       "      <td>flüchtlinge kosovo kurze glück regenbogenland ...</td>\n",
       "      <td>9210</td>\n",
       "      <td>7377</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>022016</td>\n",
       "      <td>Berlin: De Maizière verteidigt schnellere Absc...</td>\n",
       "      <td>[berlin, de, maizière, verteidigt, schnellere,...</td>\n",
       "      <td>berlin de maizière verteidigt schnellere absch...</td>\n",
       "      <td>38082</td>\n",
       "      <td>31666</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>022017</td>\n",
       "      <td>48.000 Opfer von Genitalverstümmelungen in Deu...</td>\n",
       "      <td>[opfer, genitalverstümmelungen, deutschland, d...</td>\n",
       "      <td>opfer genitalverstümmelungen deutschland deuts...</td>\n",
       "      <td>10684</td>\n",
       "      <td>8076</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>022018</td>\n",
       "      <td>Zuwanderung und Arbeit: Migrantinnen entlasten...</td>\n",
       "      <td>[zuwanderung, arbeit, migrantinnen, entlasten,...</td>\n",
       "      <td>zuwanderung arbeit migrantinnen entlasten fami...</td>\n",
       "      <td>3775</td>\n",
       "      <td>3291</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>032014</td>\n",
       "      <td>Lampedusa: Mehr als 500 Flüchtlinge vor Lamped...</td>\n",
       "      <td>[lampedusa, mehr, flüchtlinge, lampedusa, gere...</td>\n",
       "      <td>lampedusa mehr flüchtlinge lampedusa gerettet ...</td>\n",
       "      <td>4679</td>\n",
       "      <td>3195</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>032015</td>\n",
       "      <td>Zahl der Ausländer in Deutschland erreicht Rek...</td>\n",
       "      <td>[zahl, ausländer, deutschland, erreicht, rekor...</td>\n",
       "      <td>zahl ausländer deutschland erreicht rekordhoch...</td>\n",
       "      <td>9446</td>\n",
       "      <td>7768</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>032016</td>\n",
       "      <td>Bankwesen: Jedermann-Konto auch für Flüchtling...</td>\n",
       "      <td>[bankwesen, flüchtlinge, to, view, this, video...</td>\n",
       "      <td>bankwesen flüchtlinge to view this video pleas...</td>\n",
       "      <td>27899</td>\n",
       "      <td>22611</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>032017</td>\n",
       "      <td>Bundestagswahl: AfD will „kriminelle Migranten...</td>\n",
       "      <td>[bundestagswahl, afd, kriminelle, migranten, a...</td>\n",
       "      <td>bundestagswahl afd kriminelle migranten ausbür...</td>\n",
       "      <td>7658</td>\n",
       "      <td>6311</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>032018</td>\n",
       "      <td>Mindestens 14 Menschen vor Griechenland ertrun...</td>\n",
       "      <td>[mindestens, menschen, griechenland, ertrunken...</td>\n",
       "      <td>mindestens menschen griechenland ertrunken war...</td>\n",
       "      <td>5912</td>\n",
       "      <td>5111</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>042014</td>\n",
       "      <td>Flüchtlinge in Griechenland: „Die Hölle auf Er...</td>\n",
       "      <td>[flüchtlinge, griechenland, hölle, erden, bitt...</td>\n",
       "      <td>flüchtlinge griechenland hölle erden bitte mel...</td>\n",
       "      <td>6116</td>\n",
       "      <td>4512</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>042015</td>\n",
       "      <td>Athen will zehntausenden Flüchtlingen Asyl gew...</td>\n",
       "      <td>[athen, zehntausenden, flüchtlingen, asyl, gew...</td>\n",
       "      <td>athen zehntausenden flüchtlingen asyl gewähren...</td>\n",
       "      <td>11103</td>\n",
       "      <td>9906</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>042016</td>\n",
       "      <td>Video: Proteste gegen Aufnahme von Flüchtlinge...</td>\n",
       "      <td>[video, proteste, aufnahme, flüchtlingen, türk...</td>\n",
       "      <td>video proteste aufnahme flüchtlingen türkei to...</td>\n",
       "      <td>19731</td>\n",
       "      <td>14439</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>042017</td>\n",
       "      <td>Trotz Rettungsmissionen: Mindestens 20 Flüchtl...</td>\n",
       "      <td>[trotz, rettungsmissionen, mindestens, flüchtl...</td>\n",
       "      <td>trotz rettungsmissionen mindestens flüchtlinge...</td>\n",
       "      <td>12129</td>\n",
       "      <td>9453</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>042018</td>\n",
       "      <td>Hartz IV: Jeder 10. Bezieher in Deutschland st...</td>\n",
       "      <td>[hartz, iv, bezieher, deutschland, stammt, syr...</td>\n",
       "      <td>hartz iv bezieher deutschland stammt syrien wa...</td>\n",
       "      <td>6421</td>\n",
       "      <td>5296</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>052014</td>\n",
       "      <td>Europa-Wahlkampf: Kindergeld für EU-Ausländer ...</td>\n",
       "      <td>[kindergeld, bitte, melden, cdu, csu, schärfer...</td>\n",
       "      <td>kindergeld bitte melden cdu csu schärfer kontr...</td>\n",
       "      <td>3727</td>\n",
       "      <td>2629</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>052015</td>\n",
       "      <td>Angela Merkel pocht trotz Widerstand auf Flüch...</td>\n",
       "      <td>[angela, merkel, pocht, trotz, widerstand, flü...</td>\n",
       "      <td>angela merkel pocht trotz widerstand flüchtlin...</td>\n",
       "      <td>5557</td>\n",
       "      <td>4783</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>052016</td>\n",
       "      <td>Frankreich: Afghanische Filmstars als Flüchtli...</td>\n",
       "      <td>[frankreich, afghanische, filmstars, flüchtlin...</td>\n",
       "      <td>frankreich afghanische filmstars flüchtlinge t...</td>\n",
       "      <td>18054</td>\n",
       "      <td>14362</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>052017</td>\n",
       "      <td>Bundestag beschließt Maßnahmen für effektivere...</td>\n",
       "      <td>[bundestag, beschließt, maßnahmen, effektivere...</td>\n",
       "      <td>bundestag beschließt maßnahmen effektivere abs...</td>\n",
       "      <td>7618</td>\n",
       "      <td>5631</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>052018</td>\n",
       "      <td>Zehntausende müssen zurück nach Honduras Warum...</td>\n",
       "      <td>[zehntausende, müssen, zurück, honduras, warum...</td>\n",
       "      <td>zehntausende müssen zurück honduras warum sehe...</td>\n",
       "      <td>6065</td>\n",
       "      <td>5324</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>062014</td>\n",
       "      <td>UN-Bericht: Mehr als 50 Millionen Menschen auf...</td>\n",
       "      <td>[mehr, millionen, menschen, flucht, bitte, mel...</td>\n",
       "      <td>mehr millionen menschen flucht bitte melden fl...</td>\n",
       "      <td>3981</td>\n",
       "      <td>2604</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>062015</td>\n",
       "      <td>Grenze geschlossen: Syrische Flüchtlinge sitze...</td>\n",
       "      <td>[grenze, geschlossen, syrische, flüchtlinge, s...</td>\n",
       "      <td>grenze geschlossen syrische flüchtlinge sitzen...</td>\n",
       "      <td>13139</td>\n",
       "      <td>10250</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>062016</td>\n",
       "      <td>Mittelmeer: Italienische Küstenwache rettet 13...</td>\n",
       "      <td>[mittelmeer, italienische, küstenwache, rettet...</td>\n",
       "      <td>mittelmeer italienische küstenwache rettet flü...</td>\n",
       "      <td>14593</td>\n",
       "      <td>11150</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>062017</td>\n",
       "      <td>Niederlande mitverantwortlich für Srebrenica-O...</td>\n",
       "      <td>[niederlande, mitverantwortlich, niederlande, ...</td>\n",
       "      <td>niederlande mitverantwortlich niederlande trag...</td>\n",
       "      <td>13439</td>\n",
       "      <td>10018</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>062018</td>\n",
       "      <td>F.A.Z. Einspruch Podcast 13. Juni 2018 von Con...</td>\n",
       "      <td>[einspruch, podcast, juni, constantin, van, li...</td>\n",
       "      <td>einspruch podcast juni constantin van lijnden ...</td>\n",
       "      <td>8770</td>\n",
       "      <td>10486</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>072014</td>\n",
       "      <td>Die Zahl der minderjährigen Flüchtlinge steigt...</td>\n",
       "      <td>[zahl, minderjährigen, flüchtlinge, steigt, bi...</td>\n",
       "      <td>zahl minderjährigen flüchtlinge steigt bitte m...</td>\n",
       "      <td>5995</td>\n",
       "      <td>4484</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>072015</td>\n",
       "      <td>EU-Staaten erreichen nur Teillösung im Streit ...</td>\n",
       "      <td>[erreichen, teillösung, streit, flüchtlingsver...</td>\n",
       "      <td>erreichen teillösung streit flüchtlingsverteil...</td>\n",
       "      <td>16917</td>\n",
       "      <td>12869</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>072016</td>\n",
       "      <td>München: Minderjährige Flüchtlinge vor Radikal...</td>\n",
       "      <td>[münchen, minderjährige, flüchtlinge, radikali...</td>\n",
       "      <td>münchen minderjährige flüchtlinge radikalisier...</td>\n",
       "      <td>19517</td>\n",
       "      <td>16354</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>072017</td>\n",
       "      <td>Leiter des World Food Programme zu Trumps Plän...</td>\n",
       "      <td>[leiter, world, food, programme, trumps, pläne...</td>\n",
       "      <td>leiter world food programme trumps plänen gesp...</td>\n",
       "      <td>13926</td>\n",
       "      <td>10246</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>072018</td>\n",
       "      <td>CSU ringt um Antwort auf Merkels Asyl-Paket Wa...</td>\n",
       "      <td>[csu, ringt, antwort, merkels, warum, sehe, th...</td>\n",
       "      <td>csu ringt antwort merkels warum sehe this argu...</td>\n",
       "      <td>4896</td>\n",
       "      <td>5459</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>082014</td>\n",
       "      <td>Asylbewerber in Deutschland: Überfordert mit d...</td>\n",
       "      <td>[asylbewerber, deutschland, überfordert, hilfe...</td>\n",
       "      <td>asylbewerber deutschland überfordert hilfe bit...</td>\n",
       "      <td>7778</td>\n",
       "      <td>6200</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>082015</td>\n",
       "      <td>Griechenland: Flüchtlinge durchbrechen Grenze ...</td>\n",
       "      <td>[griechenland, flüchtlinge, durchbrechen, gren...</td>\n",
       "      <td>griechenland flüchtlinge durchbrechen grenze m...</td>\n",
       "      <td>23101</td>\n",
       "      <td>19319</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>082016</td>\n",
       "      <td>Integration: Junge Flüchtlinge arbeiten für di...</td>\n",
       "      <td>[integration, junge, flüchtlinge, arbeiten, de...</td>\n",
       "      <td>integration junge flüchtlinge arbeiten deutsch...</td>\n",
       "      <td>18623</td>\n",
       "      <td>14595</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>082017</td>\n",
       "      <td>AfD will Flüchtlinge nach Libyen zurückschicke...</td>\n",
       "      <td>[afd, flüchtlinge, libyen, zurückschicken, spi...</td>\n",
       "      <td>afd flüchtlinge libyen zurückschicken spitzenk...</td>\n",
       "      <td>14278</td>\n",
       "      <td>12375</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>082018</td>\n",
       "      <td>Migranten auf dem Arbeitsmarkt: Rational diskr...</td>\n",
       "      <td>[migranten, arbeitsmarkt, rational, diskrimini...</td>\n",
       "      <td>migranten arbeitsmarkt rational diskriminieren...</td>\n",
       "      <td>4577</td>\n",
       "      <td>5302</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>092014</td>\n",
       "      <td>Flüchtlinge in Deutschland: De Maizière forder...</td>\n",
       "      <td>[flüchtlinge, deutschland, de, maizière, forde...</td>\n",
       "      <td>flüchtlinge deutschland de maizière fordert ge...</td>\n",
       "      <td>8827</td>\n",
       "      <td>7476</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>092015</td>\n",
       "      <td>Übergriffe auf Flüchtlingsunterkünfte 17.07.20...</td>\n",
       "      <td>[übergriffe, flüchtlingsunterkünfte, uhr, weit...</td>\n",
       "      <td>übergriffe flüchtlingsunterkünfte uhr weitersa...</td>\n",
       "      <td>37432</td>\n",
       "      <td>31393</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>092016</td>\n",
       "      <td>Sachsen: Flüchtlinge und Rechte prügeln sich i...</td>\n",
       "      <td>[sachsen, flüchtlinge, rechte, prügeln, bautze...</td>\n",
       "      <td>sachsen flüchtlinge rechte prügeln bautzen to ...</td>\n",
       "      <td>21313</td>\n",
       "      <td>17029</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>092017</td>\n",
       "      <td>Wegen „Dreamers“-Entscheidung: Obama attackier...</td>\n",
       "      <td>[wegen, dreamers, obama, attackiert, trump, re...</td>\n",
       "      <td>wegen dreamers obama attackiert trump ressorts...</td>\n",
       "      <td>10425</td>\n",
       "      <td>7538</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>092018</td>\n",
       "      <td>F.A.Z. Einspruch Podcast 19. September 2018 vo...</td>\n",
       "      <td>[einspruch, podcast, september, corinna, budra...</td>\n",
       "      <td>einspruch podcast september corinna budras con...</td>\n",
       "      <td>1897</td>\n",
       "      <td>2267</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>102014</td>\n",
       "      <td>Syrische Flüchtlinge im Wettlauf gegen den Win...</td>\n",
       "      <td>[syrische, flüchtlinge, wettlauf, winter, bitt...</td>\n",
       "      <td>syrische flüchtlinge wettlauf winter bitte mel...</td>\n",
       "      <td>12252</td>\n",
       "      <td>10088</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>102015</td>\n",
       "      <td>Freiwillige helfen aus: Kostenloses WLAN für F...</td>\n",
       "      <td>[freiwillige, helfen, kostenloses, wlan, flüch...</td>\n",
       "      <td>freiwillige helfen kostenloses wlan flüchtling...</td>\n",
       "      <td>33499</td>\n",
       "      <td>29727</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>102016</td>\n",
       "      <td>Langwierige Protestaktion: Flüchtlinge marschi...</td>\n",
       "      <td>[langwierige, protestaktion, flüchtlinge, mars...</td>\n",
       "      <td>langwierige protestaktion flüchtlinge marschie...</td>\n",
       "      <td>14379</td>\n",
       "      <td>11627</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>102017</td>\n",
       "      <td>Frankreichs Asylpolitik: Schwacher Staat Warum...</td>\n",
       "      <td>[frankreichs, asylpolitik, schwacher, staat, w...</td>\n",
       "      <td>frankreichs asylpolitik schwacher staat warum ...</td>\n",
       "      <td>9992</td>\n",
       "      <td>6920</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>102018</td>\n",
       "      <td>Hanks Welt: Wohin mit den Migranten? Warum seh...</td>\n",
       "      <td>[hanks, welt, wohin, migranten, warum, sehe, t...</td>\n",
       "      <td>hanks welt wohin migranten warum sehe this arg...</td>\n",
       "      <td>1649</td>\n",
       "      <td>1836</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>112014</td>\n",
       "      <td>Einwanderung: De Maizière will Kriminelle früh...</td>\n",
       "      <td>[einwanderung, de, maizière, kriminelle, frühe...</td>\n",
       "      <td>einwanderung de maizière kriminelle früher abs...</td>\n",
       "      <td>6747</td>\n",
       "      <td>5199</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>112015</td>\n",
       "      <td>Flüchtlingskrise: Gewalt gegen Flüchtlinge in ...</td>\n",
       "      <td>[flüchtlingskrise, gewalt, flüchtlinge, schwed...</td>\n",
       "      <td>flüchtlingskrise gewalt flüchtlinge schweden n...</td>\n",
       "      <td>32241</td>\n",
       "      <td>28268</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>112016</td>\n",
       "      <td>Kinderehen: Die eigene Ordnung Kommentar: Die ...</td>\n",
       "      <td>[kinderehen, eigene, ordnung, kommentar, eigen...</td>\n",
       "      <td>kinderehen eigene ordnung kommentar eigene ord...</td>\n",
       "      <td>9721</td>\n",
       "      <td>7116</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>112017</td>\n",
       "      <td>Folge von Migration: Immer mehr Kinder leben v...</td>\n",
       "      <td>[folge, migration, immer, mehr, kinder, leben,...</td>\n",
       "      <td>folge migration immer mehr kinder leben hartz ...</td>\n",
       "      <td>3838</td>\n",
       "      <td>3647</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>112018</td>\n",
       "      <td>Bürgschaften für Flüchtlinge, Wiesbadener soll...</td>\n",
       "      <td>[bürgschaften, flüchtlinge, wiesbadener, solle...</td>\n",
       "      <td>bürgschaften flüchtlinge wiesbadener sollen za...</td>\n",
       "      <td>5028</td>\n",
       "      <td>5654</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>122014</td>\n",
       "      <td>Asylbewerber: Bleiberecht wird reformiert Bitt...</td>\n",
       "      <td>[asylbewerber, bleiberecht, reformiert, bitte,...</td>\n",
       "      <td>asylbewerber bleiberecht reformiert bitte meld...</td>\n",
       "      <td>11169</td>\n",
       "      <td>8988</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>122015</td>\n",
       "      <td>Neuankömmlinge in Deutschland: De Maizière: Zu...</td>\n",
       "      <td>[neuankömmlinge, deutschland, de, maizière, zu...</td>\n",
       "      <td>neuankömmlinge deutschland de maizière zuzug f...</td>\n",
       "      <td>24324</td>\n",
       "      <td>19431</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>122016</td>\n",
       "      <td>Abgelehnte Asylbewerber: Schaffen wir Abschieb...</td>\n",
       "      <td>[abgelehnte, asylbewerber, schaffen, abschiebu...</td>\n",
       "      <td>abgelehnte asylbewerber schaffen abschiebung a...</td>\n",
       "      <td>23041</td>\n",
       "      <td>17779</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>122017</td>\n",
       "      <td>Politiker fordern medizinische Altersprüfung b...</td>\n",
       "      <td>[politiker, fordern, medizinische, altersprüfu...</td>\n",
       "      <td>politiker fordern medizinische altersprüfung f...</td>\n",
       "      <td>8788</td>\n",
       "      <td>7164</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>122018</td>\n",
       "      <td>F.A.Z. Einspruch Podcast 19. Dezember 2018 von...</td>\n",
       "      <td>[einspruch, podcast, dezember, constantin, van...</td>\n",
       "      <td>einspruch podcast dezember constantin van lijn...</td>\n",
       "      <td>2818</td>\n",
       "      <td>3453</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012014  Hartz-IV-Streit EU-Parlamentspräsident Schulz ...   \n",
       "1   012015  Nantes: Flüchtlinge besetzen französisches Pfa...   \n",
       "2   012016  Flüchtlingskrise: Mazedonien öffnet Grenze für...   \n",
       "3   012017  Asylbewerber: Opfer rechter Gewalt dürfen blei...   \n",
       "4   012018  Amerika erwägt Trennung von Familien bei illeg...   \n",
       "5   022014  Zentralafrikanische Republik Flüchtlinge warte...   \n",
       "6   022015  Flüchtlinge aus dem Kosovo: Das kurze Glück im...   \n",
       "7   022016  Berlin: De Maizière verteidigt schnellere Absc...   \n",
       "8   022017  48.000 Opfer von Genitalverstümmelungen in Deu...   \n",
       "9   022018  Zuwanderung und Arbeit: Migrantinnen entlasten...   \n",
       "10  032014  Lampedusa: Mehr als 500 Flüchtlinge vor Lamped...   \n",
       "11  032015  Zahl der Ausländer in Deutschland erreicht Rek...   \n",
       "12  032016  Bankwesen: Jedermann-Konto auch für Flüchtling...   \n",
       "13  032017  Bundestagswahl: AfD will „kriminelle Migranten...   \n",
       "14  032018  Mindestens 14 Menschen vor Griechenland ertrun...   \n",
       "15  042014  Flüchtlinge in Griechenland: „Die Hölle auf Er...   \n",
       "16  042015  Athen will zehntausenden Flüchtlingen Asyl gew...   \n",
       "17  042016  Video: Proteste gegen Aufnahme von Flüchtlinge...   \n",
       "18  042017  Trotz Rettungsmissionen: Mindestens 20 Flüchtl...   \n",
       "19  042018  Hartz IV: Jeder 10. Bezieher in Deutschland st...   \n",
       "20  052014  Europa-Wahlkampf: Kindergeld für EU-Ausländer ...   \n",
       "21  052015  Angela Merkel pocht trotz Widerstand auf Flüch...   \n",
       "22  052016  Frankreich: Afghanische Filmstars als Flüchtli...   \n",
       "23  052017  Bundestag beschließt Maßnahmen für effektivere...   \n",
       "24  052018  Zehntausende müssen zurück nach Honduras Warum...   \n",
       "25  062014  UN-Bericht: Mehr als 50 Millionen Menschen auf...   \n",
       "26  062015  Grenze geschlossen: Syrische Flüchtlinge sitze...   \n",
       "27  062016  Mittelmeer: Italienische Küstenwache rettet 13...   \n",
       "28  062017  Niederlande mitverantwortlich für Srebrenica-O...   \n",
       "29  062018  F.A.Z. Einspruch Podcast 13. Juni 2018 von Con...   \n",
       "30  072014  Die Zahl der minderjährigen Flüchtlinge steigt...   \n",
       "31  072015  EU-Staaten erreichen nur Teillösung im Streit ...   \n",
       "32  072016  München: Minderjährige Flüchtlinge vor Radikal...   \n",
       "33  072017  Leiter des World Food Programme zu Trumps Plän...   \n",
       "34  072018  CSU ringt um Antwort auf Merkels Asyl-Paket Wa...   \n",
       "35  082014  Asylbewerber in Deutschland: Überfordert mit d...   \n",
       "36  082015  Griechenland: Flüchtlinge durchbrechen Grenze ...   \n",
       "37  082016  Integration: Junge Flüchtlinge arbeiten für di...   \n",
       "38  082017  AfD will Flüchtlinge nach Libyen zurückschicke...   \n",
       "39  082018  Migranten auf dem Arbeitsmarkt: Rational diskr...   \n",
       "40  092014  Flüchtlinge in Deutschland: De Maizière forder...   \n",
       "41  092015  Übergriffe auf Flüchtlingsunterkünfte 17.07.20...   \n",
       "42  092016  Sachsen: Flüchtlinge und Rechte prügeln sich i...   \n",
       "43  092017  Wegen „Dreamers“-Entscheidung: Obama attackier...   \n",
       "44  092018  F.A.Z. Einspruch Podcast 19. September 2018 vo...   \n",
       "45  102014  Syrische Flüchtlinge im Wettlauf gegen den Win...   \n",
       "46  102015  Freiwillige helfen aus: Kostenloses WLAN für F...   \n",
       "47  102016  Langwierige Protestaktion: Flüchtlinge marschi...   \n",
       "48  102017  Frankreichs Asylpolitik: Schwacher Staat Warum...   \n",
       "49  102018  Hanks Welt: Wohin mit den Migranten? Warum seh...   \n",
       "50  112014  Einwanderung: De Maizière will Kriminelle früh...   \n",
       "51  112015  Flüchtlingskrise: Gewalt gegen Flüchtlinge in ...   \n",
       "52  112016  Kinderehen: Die eigene Ordnung Kommentar: Die ...   \n",
       "53  112017  Folge von Migration: Immer mehr Kinder leben v...   \n",
       "54  112018  Bürgschaften für Flüchtlinge, Wiesbadener soll...   \n",
       "55  122014  Asylbewerber: Bleiberecht wird reformiert Bitt...   \n",
       "56  122015  Neuankömmlinge in Deutschland: De Maizière: Zu...   \n",
       "57  122016  Abgelehnte Asylbewerber: Schaffen wir Abschieb...   \n",
       "58  122017  Politiker fordern medizinische Altersprüfung b...   \n",
       "59  122018  F.A.Z. Einspruch Podcast 19. Dezember 2018 von...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [schulz, warnt, brüssel, bitte, melden, katego...   \n",
       "1   [nantes, flüchtlinge, besetzen, französisches,...   \n",
       "2   [flüchtlingskrise, mazedonien, öffnet, grenze,...   \n",
       "3   [asylbewerber, opfer, rechter, gewalt, dürfen,...   \n",
       "4   [amerika, erwägt, trennung, familien, illegale...   \n",
       "5   [zentralafrikanische, republik, flüchtlinge, w...   \n",
       "6   [flüchtlinge, kosovo, kurze, glück, regenbogen...   \n",
       "7   [berlin, de, maizière, verteidigt, schnellere,...   \n",
       "8   [opfer, genitalverstümmelungen, deutschland, d...   \n",
       "9   [zuwanderung, arbeit, migrantinnen, entlasten,...   \n",
       "10  [lampedusa, mehr, flüchtlinge, lampedusa, gere...   \n",
       "11  [zahl, ausländer, deutschland, erreicht, rekor...   \n",
       "12  [bankwesen, flüchtlinge, to, view, this, video...   \n",
       "13  [bundestagswahl, afd, kriminelle, migranten, a...   \n",
       "14  [mindestens, menschen, griechenland, ertrunken...   \n",
       "15  [flüchtlinge, griechenland, hölle, erden, bitt...   \n",
       "16  [athen, zehntausenden, flüchtlingen, asyl, gew...   \n",
       "17  [video, proteste, aufnahme, flüchtlingen, türk...   \n",
       "18  [trotz, rettungsmissionen, mindestens, flüchtl...   \n",
       "19  [hartz, iv, bezieher, deutschland, stammt, syr...   \n",
       "20  [kindergeld, bitte, melden, cdu, csu, schärfer...   \n",
       "21  [angela, merkel, pocht, trotz, widerstand, flü...   \n",
       "22  [frankreich, afghanische, filmstars, flüchtlin...   \n",
       "23  [bundestag, beschließt, maßnahmen, effektivere...   \n",
       "24  [zehntausende, müssen, zurück, honduras, warum...   \n",
       "25  [mehr, millionen, menschen, flucht, bitte, mel...   \n",
       "26  [grenze, geschlossen, syrische, flüchtlinge, s...   \n",
       "27  [mittelmeer, italienische, küstenwache, rettet...   \n",
       "28  [niederlande, mitverantwortlich, niederlande, ...   \n",
       "29  [einspruch, podcast, juni, constantin, van, li...   \n",
       "30  [zahl, minderjährigen, flüchtlinge, steigt, bi...   \n",
       "31  [erreichen, teillösung, streit, flüchtlingsver...   \n",
       "32  [münchen, minderjährige, flüchtlinge, radikali...   \n",
       "33  [leiter, world, food, programme, trumps, pläne...   \n",
       "34  [csu, ringt, antwort, merkels, warum, sehe, th...   \n",
       "35  [asylbewerber, deutschland, überfordert, hilfe...   \n",
       "36  [griechenland, flüchtlinge, durchbrechen, gren...   \n",
       "37  [integration, junge, flüchtlinge, arbeiten, de...   \n",
       "38  [afd, flüchtlinge, libyen, zurückschicken, spi...   \n",
       "39  [migranten, arbeitsmarkt, rational, diskrimini...   \n",
       "40  [flüchtlinge, deutschland, de, maizière, forde...   \n",
       "41  [übergriffe, flüchtlingsunterkünfte, uhr, weit...   \n",
       "42  [sachsen, flüchtlinge, rechte, prügeln, bautze...   \n",
       "43  [wegen, dreamers, obama, attackiert, trump, re...   \n",
       "44  [einspruch, podcast, september, corinna, budra...   \n",
       "45  [syrische, flüchtlinge, wettlauf, winter, bitt...   \n",
       "46  [freiwillige, helfen, kostenloses, wlan, flüch...   \n",
       "47  [langwierige, protestaktion, flüchtlinge, mars...   \n",
       "48  [frankreichs, asylpolitik, schwacher, staat, w...   \n",
       "49  [hanks, welt, wohin, migranten, warum, sehe, t...   \n",
       "50  [einwanderung, de, maizière, kriminelle, frühe...   \n",
       "51  [flüchtlingskrise, gewalt, flüchtlinge, schwed...   \n",
       "52  [kinderehen, eigene, ordnung, kommentar, eigen...   \n",
       "53  [folge, migration, immer, mehr, kinder, leben,...   \n",
       "54  [bürgschaften, flüchtlinge, wiesbadener, solle...   \n",
       "55  [asylbewerber, bleiberecht, reformiert, bitte,...   \n",
       "56  [neuankömmlinge, deutschland, de, maizière, zu...   \n",
       "57  [abgelehnte, asylbewerber, schaffen, abschiebu...   \n",
       "58  [politiker, fordern, medizinische, altersprüfu...   \n",
       "59  [einspruch, podcast, dezember, constantin, van...   \n",
       "\n",
       "                                           liststring  positive words  \\\n",
       "0   schulz warnt brüssel bitte melden kategorische...           12562   \n",
       "1   nantes flüchtlinge besetzen französisches pfar...            9263   \n",
       "2   flüchtlingskrise mazedonien öffnet grenze ausg...           35265   \n",
       "3   asylbewerber opfer rechter gewalt dürfen bleib...           23132   \n",
       "4   amerika erwägt trennung familien illegaler ein...           11322   \n",
       "5   zentralafrikanische republik flüchtlinge warte...           11880   \n",
       "6   flüchtlinge kosovo kurze glück regenbogenland ...            9210   \n",
       "7   berlin de maizière verteidigt schnellere absch...           38082   \n",
       "8   opfer genitalverstümmelungen deutschland deuts...           10684   \n",
       "9   zuwanderung arbeit migrantinnen entlasten fami...            3775   \n",
       "10  lampedusa mehr flüchtlinge lampedusa gerettet ...            4679   \n",
       "11  zahl ausländer deutschland erreicht rekordhoch...            9446   \n",
       "12  bankwesen flüchtlinge to view this video pleas...           27899   \n",
       "13  bundestagswahl afd kriminelle migranten ausbür...            7658   \n",
       "14  mindestens menschen griechenland ertrunken war...            5912   \n",
       "15  flüchtlinge griechenland hölle erden bitte mel...            6116   \n",
       "16  athen zehntausenden flüchtlingen asyl gewähren...           11103   \n",
       "17  video proteste aufnahme flüchtlingen türkei to...           19731   \n",
       "18  trotz rettungsmissionen mindestens flüchtlinge...           12129   \n",
       "19  hartz iv bezieher deutschland stammt syrien wa...            6421   \n",
       "20  kindergeld bitte melden cdu csu schärfer kontr...            3727   \n",
       "21  angela merkel pocht trotz widerstand flüchtlin...            5557   \n",
       "22  frankreich afghanische filmstars flüchtlinge t...           18054   \n",
       "23  bundestag beschließt maßnahmen effektivere abs...            7618   \n",
       "24  zehntausende müssen zurück honduras warum sehe...            6065   \n",
       "25  mehr millionen menschen flucht bitte melden fl...            3981   \n",
       "26  grenze geschlossen syrische flüchtlinge sitzen...           13139   \n",
       "27  mittelmeer italienische küstenwache rettet flü...           14593   \n",
       "28  niederlande mitverantwortlich niederlande trag...           13439   \n",
       "29  einspruch podcast juni constantin van lijnden ...            8770   \n",
       "30  zahl minderjährigen flüchtlinge steigt bitte m...            5995   \n",
       "31  erreichen teillösung streit flüchtlingsverteil...           16917   \n",
       "32  münchen minderjährige flüchtlinge radikalisier...           19517   \n",
       "33  leiter world food programme trumps plänen gesp...           13926   \n",
       "34  csu ringt antwort merkels warum sehe this argu...            4896   \n",
       "35  asylbewerber deutschland überfordert hilfe bit...            7778   \n",
       "36  griechenland flüchtlinge durchbrechen grenze m...           23101   \n",
       "37  integration junge flüchtlinge arbeiten deutsch...           18623   \n",
       "38  afd flüchtlinge libyen zurückschicken spitzenk...           14278   \n",
       "39  migranten arbeitsmarkt rational diskriminieren...            4577   \n",
       "40  flüchtlinge deutschland de maizière fordert ge...            8827   \n",
       "41  übergriffe flüchtlingsunterkünfte uhr weitersa...           37432   \n",
       "42  sachsen flüchtlinge rechte prügeln bautzen to ...           21313   \n",
       "43  wegen dreamers obama attackiert trump ressorts...           10425   \n",
       "44  einspruch podcast september corinna budras con...            1897   \n",
       "45  syrische flüchtlinge wettlauf winter bitte mel...           12252   \n",
       "46  freiwillige helfen kostenloses wlan flüchtling...           33499   \n",
       "47  langwierige protestaktion flüchtlinge marschie...           14379   \n",
       "48  frankreichs asylpolitik schwacher staat warum ...            9992   \n",
       "49  hanks welt wohin migranten warum sehe this arg...            1649   \n",
       "50  einwanderung de maizière kriminelle früher abs...            6747   \n",
       "51  flüchtlingskrise gewalt flüchtlinge schweden n...           32241   \n",
       "52  kinderehen eigene ordnung kommentar eigene ord...            9721   \n",
       "53  folge migration immer mehr kinder leben hartz ...            3838   \n",
       "54  bürgschaften flüchtlinge wiesbadener sollen za...            5028   \n",
       "55  asylbewerber bleiberecht reformiert bitte meld...           11169   \n",
       "56  neuankömmlinge deutschland de maizière zuzug f...           24324   \n",
       "57  abgelehnte asylbewerber schaffen abschiebung a...           23041   \n",
       "58  politiker fordern medizinische altersprüfung f...            8788   \n",
       "59  einspruch podcast dezember constantin van lijn...            2818   \n",
       "\n",
       "    negative words   overall  \n",
       "0             9510  positive  \n",
       "1             8069  positive  \n",
       "2            29230  positive  \n",
       "3            17372  positive  \n",
       "4             9099  positive  \n",
       "5             8128  positive  \n",
       "6             7377  positive  \n",
       "7            31666  positive  \n",
       "8             8076  positive  \n",
       "9             3291  positive  \n",
       "10            3195  positive  \n",
       "11            7768  positive  \n",
       "12           22611  positive  \n",
       "13            6311  positive  \n",
       "14            5111  positive  \n",
       "15            4512  positive  \n",
       "16            9906  positive  \n",
       "17           14439  positive  \n",
       "18            9453  positive  \n",
       "19            5296  positive  \n",
       "20            2629  positive  \n",
       "21            4783  positive  \n",
       "22           14362  positive  \n",
       "23            5631  positive  \n",
       "24            5324  positive  \n",
       "25            2604  positive  \n",
       "26           10250  positive  \n",
       "27           11150  positive  \n",
       "28           10018  positive  \n",
       "29           10486  negative  \n",
       "30            4484  positive  \n",
       "31           12869  positive  \n",
       "32           16354  positive  \n",
       "33           10246  positive  \n",
       "34            5459  negative  \n",
       "35            6200  positive  \n",
       "36           19319  positive  \n",
       "37           14595  positive  \n",
       "38           12375  positive  \n",
       "39            5302  negative  \n",
       "40            7476  positive  \n",
       "41           31393  positive  \n",
       "42           17029  positive  \n",
       "43            7538  positive  \n",
       "44            2267  negative  \n",
       "45           10088  positive  \n",
       "46           29727  positive  \n",
       "47           11627  positive  \n",
       "48            6920  positive  \n",
       "49            1836  negative  \n",
       "50            5199  positive  \n",
       "51           28268  positive  \n",
       "52            7116  positive  \n",
       "53            3647  positive  \n",
       "54            5654  negative  \n",
       "55            8988  positive  \n",
       "56           19431  positive  \n",
       "57           17779  positive  \n",
       "58            7164  positive  \n",
       "59            3453  negative  "
      ]
     },
     "execution_count": 92,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(faz_rel_final['positive words'] > faz_rel_final['negative words']),\n",
    "(faz_rel_final['negative words'] > faz_rel_final['positive words']),\n",
    "(faz_rel_final['negative words'] == faz_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "faz_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "faz_rel_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Hartz-IV-Streit EU-Parlamentspräsident Schulz ...</td>\n",
       "      <td>schulz warnt brüssel bitte melden kategorische...</td>\n",
       "      <td>12562</td>\n",
       "      <td>9510</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nantes: Flüchtlinge besetzen französisches Pfa...</td>\n",
       "      <td>nantes flüchtlinge besetzen französisches pfar...</td>\n",
       "      <td>9263</td>\n",
       "      <td>8069</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Flüchtlingskrise: Mazedonien öffnet Grenze für...</td>\n",
       "      <td>flüchtlingskrise mazedonien öffnet grenze ausg...</td>\n",
       "      <td>35265</td>\n",
       "      <td>29230</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Asylbewerber: Opfer rechter Gewalt dürfen blei...</td>\n",
       "      <td>asylbewerber opfer rechter gewalt dürfen bleib...</td>\n",
       "      <td>23132</td>\n",
       "      <td>17372</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Amerika erwägt Trennung von Familien bei illeg...</td>\n",
       "      <td>amerika erwägt trennung familien illegaler ein...</td>\n",
       "      <td>11322</td>\n",
       "      <td>9099</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012014  Hartz-IV-Streit EU-Parlamentspräsident Schulz ...   \n",
       "1     012015  Nantes: Flüchtlinge besetzen französisches Pfa...   \n",
       "2     012016  Flüchtlingskrise: Mazedonien öffnet Grenze für...   \n",
       "3     012017  Asylbewerber: Opfer rechter Gewalt dürfen blei...   \n",
       "4     012018  Amerika erwägt Trennung von Familien bei illeg...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  schulz warnt brüssel bitte melden kategorische...           12562   \n",
       "1  nantes flüchtlinge besetzen französisches pfar...            9263   \n",
       "2  flüchtlingskrise mazedonien öffnet grenze ausg...           35265   \n",
       "3  asylbewerber opfer rechter gewalt dürfen bleib...           23132   \n",
       "4  amerika erwägt trennung familien illegaler ein...           11322   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0            9510          positive  \n",
       "1            8069          positive  \n",
       "2           29230          positive  \n",
       "3           17372          positive  \n",
       "4            9099          positive  "
      ]
     },
     "execution_count": 93,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del faz_rel_final['nlpprocessed']\n",
    "faz_rel_final = faz_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "faz_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/faz_sentiment.csv\")\n",
    "faz_rel_final.head()\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## taz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "# TAZ\n",
    "\n",
    "taz = pd.read_csv(\"/Users/ashrakatelshehawy/taz_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None,  error_bad_lines=False)\n",
    "\n",
    "#remove unnecessary columns\n",
    "taz_rel = taz.drop(taz.columns[[0,1,5,6,7,8]], axis=1)\n",
    "\n",
    "#give column names\n",
    "taz_rel.columns = ['date', 'title',\"content\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2373</th>\n",
       "      <td>20160914</td>\n",
       "      <td>Ungarn plant zweiten Grenzzaun</td>\n",
       "      <td>Der ungarische Ministerpräsident Victor Orban...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1990</th>\n",
       "      <td>20150814</td>\n",
       "      <td>Zelten in Deutschland</td>\n",
       "      <td>Auf beengtem Raum müssen hunderte Menschen le...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>20160306</td>\n",
       "      <td>Merkel will „Beschlüsse umsetzen“</td>\n",
       "      <td>Schutz der Außengrenze, Hilfe für Griechenlan...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2687</th>\n",
       "      <td>20151026</td>\n",
       "      <td>Rechte marschieren, Heim brennt</td>\n",
       "      <td>In Freiberg mussten am Sonntagabend 200 Poliz...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>632</th>\n",
       "      <td>20160208</td>\n",
       "      <td>Fast alle Maßnahmen sind denkbar</td>\n",
       "      <td>Das Sichern der EU-Außengrenze hat Priorität....</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date                              title  \\\n",
       "2373  20160914     Ungarn plant zweiten Grenzzaun   \n",
       "1990  20150814              Zelten in Deutschland   \n",
       "195   20160306  Merkel will „Beschlüsse umsetzen“   \n",
       "2687  20151026    Rechte marschieren, Heim brennt   \n",
       "632   20160208   Fast alle Maßnahmen sind denkbar   \n",
       "\n",
       "                                                content  \n",
       "2373   Der ungarische Ministerpräsident Victor Orban...  \n",
       "1990   Auf beengtem Raum müssen hunderte Menschen le...  \n",
       "195    Schutz der Außengrenze, Hilfe für Griechenlan...  \n",
       "2687   In Freiberg mussten am Sonntagabend 200 Poliz...  \n",
       "632    Das Sichern der EU-Außengrenze hat Priorität....  "
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#random sample of 25\n",
    "taz_rel_sample=taz_rel.sample(25)\n",
    "taz_rel_sample.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Mit Absicht zum Kentern gebracht?  Drei Frauen...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Schnellverfahren für Asylbewerber  Abschieben ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Mehr Anlass zur Abschiebung  Die Gründe für di...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Abschiebung aussetzen  Opfer rechter Gewalt in...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Asyl nicht nur im Ankunftsland  Ein syrischer ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content\n",
       "0  012014  Mit Absicht zum Kentern gebracht?  Drei Frauen...\n",
       "1  012015  Schnellverfahren für Asylbewerber  Abschieben ...\n",
       "2  012016  Mehr Anlass zur Abschiebung  Die Gründe für di...\n",
       "3  012017  Abschiebung aussetzen  Opfer rechter Gewalt in...\n",
       "4  012018  Asyl nicht nur im Ankunftsland  Ein syrischer ..."
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "# merge title and content in one variable - becaue we will use the whole text to produce the embeddings\n",
    "taz_rel[\"content\"]=taz_rel[\"title\"]+[\" \"]+taz_rel[\"content\"]\n",
    "#remove the title column\n",
    "taz_rel.drop(taz_rel.columns[1], axis=1, inplace=True)\n",
    "\n",
    "\n",
    "# lets remove the day because we dont need it\n",
    "taz_rel[\"date\"] = taz_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "taz_rel\n",
    "\n",
    "\n",
    "\n",
    "#extract last two digits to change months structure\n",
    "\n",
    "#first change to charachter\n",
    "taz_rel['date1'] = taz_rel['date'].apply(str)\n",
    "\n",
    "#new column with months and years\n",
    "taz_rel['month'] = taz_rel['date1'].str[4:6]\n",
    "taz_rel['year'] = taz_rel['date1'].str[0:4]\n",
    "\n",
    "\n",
    "\n",
    "#same date structure as the other datasets\n",
    "taz_rel[\"my\"]=taz_rel[\"month\"]+taz_rel[\"year\"]\n",
    "taz_rel\n",
    "\n",
    "#change month interger to month name\n",
    "\n",
    "#revert back to interger\n",
    "taz_rel['month'] = taz_rel['month'].apply(int)\n",
    "\n",
    "taz_rel2=taz_rel\n",
    "\n",
    "#use calendar to change month number to name\n",
    "taz_rel2['month'] = taz_rel2['month'].apply(lambda x: calendar.month_name[x])\n",
    "\n",
    "\n",
    "#have a consistent date variable for all datasets\n",
    "taz_rel2[\"month-year\"] = taz_rel2[\"month\"] +[\" \"]+ taz_rel2[\"year\"] \n",
    "\n",
    "\n",
    "#aggregate over months\n",
    "taz_rel2=taz_rel2.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "taz_rel2.head()\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp\n",
    "taz_rel2[\"nlpprocessed\"]=taz_rel2['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "taz_rel2['liststring'] = [','.join(map(str, l)) for l in taz_rel2['nlpprocessed']]\n",
    "taz_rel2['liststring'] = (taz_rel2['liststring'].replace(',',' ', regex=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "taz_rel_final=taz_rel2\n",
    "taz_rel_final['positive words'] = taz_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "taz_rel_final['negative words'] = taz_rel_final['liststring'].str.count('|'.join(negative_list1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Mit Absicht zum Kentern gebracht?  Drei Frauen...</td>\n",
       "      <td>[absicht, kentern, gebracht, drei, frauen, neu...</td>\n",
       "      <td>absicht kentern gebracht drei frauen neun kind...</td>\n",
       "      <td>1293</td>\n",
       "      <td>994</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Schnellverfahren für Asylbewerber  Abschieben ...</td>\n",
       "      <td>[schnellverfahren, asylbewerber, abschieben, a...</td>\n",
       "      <td>schnellverfahren asylbewerber abschieben ausbi...</td>\n",
       "      <td>1527</td>\n",
       "      <td>1187</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Mehr Anlass zur Abschiebung  Die Gründe für di...</td>\n",
       "      <td>[mehr, anlass, abschiebung, gründe, ausweisung...</td>\n",
       "      <td>mehr anlass abschiebung gründe ausweisung stra...</td>\n",
       "      <td>8395</td>\n",
       "      <td>6580</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Abschiebung aussetzen  Opfer rechter Gewalt in...</td>\n",
       "      <td>[abschiebung, aussetzen, opfer, rechter, gewal...</td>\n",
       "      <td>abschiebung aussetzen opfer rechter gewalt bra...</td>\n",
       "      <td>3581</td>\n",
       "      <td>2746</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Asyl nicht nur im Ankunftsland  Ein syrischer ...</td>\n",
       "      <td>[asyl, ankunftsland, syrischer, staatsangehöri...</td>\n",
       "      <td>asyl ankunftsland syrischer staatsangehöriger ...</td>\n",
       "      <td>2688</td>\n",
       "      <td>2024</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>022014</td>\n",
       "      <td>taz.Info  Eine Veranstaltung zur Wahrnehmung, ...</td>\n",
       "      <td>[veranstaltung, wahrnehmung, realität, perspek...</td>\n",
       "      <td>veranstaltung wahrnehmung realität perspektive...</td>\n",
       "      <td>1121</td>\n",
       "      <td>992</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>022015</td>\n",
       "      <td>Schleuser müssen mit Haft rechnen  Fluchthelfe...</td>\n",
       "      <td>[schleuser, müssen, haft, rechnen, fluchthelfe...</td>\n",
       "      <td>schleuser müssen haft rechnen fluchthelfer rec...</td>\n",
       "      <td>2018</td>\n",
       "      <td>1717</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>022016</td>\n",
       "      <td>Kaum Schutz vor Gewalt  Der Missbrauchsbeauftr...</td>\n",
       "      <td>[kaum, schutz, gewalt, missbrauchsbeauftragte,...</td>\n",
       "      <td>kaum schutz gewalt missbrauchsbeauftragte röri...</td>\n",
       "      <td>8406</td>\n",
       "      <td>6907</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>022017</td>\n",
       "      <td>Hunderte Rohingya getötet  Ein Bericht der UNO...</td>\n",
       "      <td>[hunderte, rohingya, getötet, bericht, uno, do...</td>\n",
       "      <td>hunderte rohingya getötet bericht uno dokument...</td>\n",
       "      <td>2913</td>\n",
       "      <td>2220</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>022018</td>\n",
       "      <td>Sanktionen für Flüchtlingskritiker  Merkel for...</td>\n",
       "      <td>[sanktionen, flüchtlingskritiker, merkel, ford...</td>\n",
       "      <td>sanktionen flüchtlingskritiker merkel fordert ...</td>\n",
       "      <td>1465</td>\n",
       "      <td>1197</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>032014</td>\n",
       "      <td>Flüchtlinge diskutieren über Angebot  Integrat...</td>\n",
       "      <td>[flüchtlinge, diskutieren, angebot, integratio...</td>\n",
       "      <td>flüchtlinge diskutieren angebot integrationsse...</td>\n",
       "      <td>2036</td>\n",
       "      <td>1527</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>032015</td>\n",
       "      <td>Mehr Asylbewerber abgeschoben  Die Zahl der Fl...</td>\n",
       "      <td>[mehr, asylbewerber, abgeschoben, zahl, flücht...</td>\n",
       "      <td>mehr asylbewerber abgeschoben zahl flüchtlinge...</td>\n",
       "      <td>1044</td>\n",
       "      <td>699</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>032016</td>\n",
       "      <td>Der Deal ist laut Pro Asyl illegal  Die Türkei...</td>\n",
       "      <td>[deal, laut, pro, asyl, illegal, türkei, sei, ...</td>\n",
       "      <td>deal laut pro asyl illegal türkei sei sicherer...</td>\n",
       "      <td>6009</td>\n",
       "      <td>4844</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>032017</td>\n",
       "      <td>Gesetz scheitert  Maghreb-Staaten sollten als ...</td>\n",
       "      <td>[gesetz, scheitert, sollten, sicher, genug, ab...</td>\n",
       "      <td>gesetz scheitert sollten sicher genug abschieb...</td>\n",
       "      <td>2997</td>\n",
       "      <td>2161</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>032018</td>\n",
       "      <td>Abschiebung in Dauerschleife  Eine afghanische...</td>\n",
       "      <td>[abschiebung, dauerschleife, afghanische, fami...</td>\n",
       "      <td>abschiebung dauerschleife afghanische familie ...</td>\n",
       "      <td>1306</td>\n",
       "      <td>963</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>042014</td>\n",
       "      <td>Zehn Minuten Zeit sparen  Innenminister Thomas...</td>\n",
       "      <td>[zehn, minuten, zeit, sparen, innenminister, t...</td>\n",
       "      <td>zehn minuten zeit sparen innenminister thomas ...</td>\n",
       "      <td>1582</td>\n",
       "      <td>1434</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>042015</td>\n",
       "      <td>Sogar de Maizière gibt sich liberal  Außen- un...</td>\n",
       "      <td>[sogar, de, maizière, gibt, liberal, innenmini...</td>\n",
       "      <td>sogar de maizière gibt liberal innenminister d...</td>\n",
       "      <td>1676</td>\n",
       "      <td>1303</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>042016</td>\n",
       "      <td>EU-Kommission legt „Optionen“ vor  Wie weiter ...</td>\n",
       "      <td>[legt, optionen, dublin, rechte, asylsuchenden...</td>\n",
       "      <td>legt optionen dublin rechte asylsuchenden soll...</td>\n",
       "      <td>4571</td>\n",
       "      <td>4157</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>042017</td>\n",
       "      <td>UNO fordert Aussetzung von Dublin II  In Ungar...</td>\n",
       "      <td>[uno, fordert, aussetzung, dublin, ii, ungarn,...</td>\n",
       "      <td>uno fordert aussetzung dublin ii ungarn asylsu...</td>\n",
       "      <td>1138</td>\n",
       "      <td>875</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>042018</td>\n",
       "      <td>Familien-Nachzug für Flüchtlinge  Viele Flücht...</td>\n",
       "      <td>[flüchtlinge, viele, flüchtlinge, familie, deu...</td>\n",
       "      <td>flüchtlinge viele flüchtlinge familie deutschl...</td>\n",
       "      <td>1724</td>\n",
       "      <td>1134</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>052014</td>\n",
       "      <td>Marsch gegen Brüssel  Europäische Asylbewerber...</td>\n",
       "      <td>[marsch, brüssel, europäische, asylbewerber, g...</td>\n",
       "      <td>marsch brüssel europäische asylbewerber gemein...</td>\n",
       "      <td>1987</td>\n",
       "      <td>1633</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>052015</td>\n",
       "      <td>Asylverfahren behindern Integration  Lange Ver...</td>\n",
       "      <td>[asylverfahren, behindern, integration, lange,...</td>\n",
       "      <td>asylverfahren behindern integration lange verf...</td>\n",
       "      <td>1466</td>\n",
       "      <td>1210</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>052016</td>\n",
       "      <td>Bundespolizei wehrt sich gegen Kritik  Die Bun...</td>\n",
       "      <td>[bundespolizei, wehrt, kritik, bundespolizei, ...</td>\n",
       "      <td>bundespolizei wehrt kritik bundespolizei mehr ...</td>\n",
       "      <td>2483</td>\n",
       "      <td>2434</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>052017</td>\n",
       "      <td>Noch einen drauf  Die Regeln für den Umgang mi...</td>\n",
       "      <td>[drauf, regeln, umgang, asylsuchenden, verschä...</td>\n",
       "      <td>drauf regeln umgang asylsuchenden verschärft b...</td>\n",
       "      <td>1415</td>\n",
       "      <td>1112</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>052018</td>\n",
       "      <td>„Die Abschiebung ist rechtswidrig“  Die Abschi...</td>\n",
       "      <td>[abschiebung, rechtswidrig, abschiebung, verha...</td>\n",
       "      <td>abschiebung rechtswidrig abschiebung verhaftun...</td>\n",
       "      <td>2951</td>\n",
       "      <td>2025</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>062014</td>\n",
       "      <td>„Politik lebt von Symbolen“  Der FDP-Innenpoli...</td>\n",
       "      <td>[politik, lebt, symbolen, oetjen, niedersächsi...</td>\n",
       "      <td>politik lebt symbolen oetjen niedersächsische ...</td>\n",
       "      <td>2007</td>\n",
       "      <td>1543</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>062015</td>\n",
       "      <td>Nichts als Stückwerk  Pro Asyl kritisiert die ...</td>\n",
       "      <td>[stückwerk, pro, asyl, kritisiert, ergebnisse,...</td>\n",
       "      <td>stückwerk pro asyl kritisiert ergebnisse flüch...</td>\n",
       "      <td>3010</td>\n",
       "      <td>2465</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>062016</td>\n",
       "      <td>Rechtsstaatliche Skrupellosigkeit  Die Türkei ...</td>\n",
       "      <td>[rechtsstaatliche, skrupellosigkeit, türkei, f...</td>\n",
       "      <td>rechtsstaatliche skrupellosigkeit türkei fühlt...</td>\n",
       "      <td>3975</td>\n",
       "      <td>3374</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>062017</td>\n",
       "      <td>Brüssel geht gegen Verweigerer vor  Polen, Ung...</td>\n",
       "      <td>[brüssel, geht, verweigerer, polen, ungarn, ts...</td>\n",
       "      <td>brüssel geht verweigerer polen ungarn tschechi...</td>\n",
       "      <td>3071</td>\n",
       "      <td>2356</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>062018</td>\n",
       "      <td>Das Prinzip der Unsolidarität  Einige Länder w...</td>\n",
       "      <td>[prinzip, unsolidarität, länder, fordern, fair...</td>\n",
       "      <td>prinzip unsolidarität länder fordern faire umv...</td>\n",
       "      <td>4496</td>\n",
       "      <td>3159</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>072014</td>\n",
       "      <td>Die allerletzten in der Schlange  Der Senat ve...</td>\n",
       "      <td>[allerletzten, schlange, senat, verweigert, fl...</td>\n",
       "      <td>allerletzten schlange senat verweigert flüchtl...</td>\n",
       "      <td>2052</td>\n",
       "      <td>1722</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>072015</td>\n",
       "      <td>Aus „unsicheren“ Staaten  Flüchtlinge werden i...</td>\n",
       "      <td>[unsicheren, staaten, flüchtlinge, deutschland...</td>\n",
       "      <td>unsicheren staaten flüchtlinge deutschland unt...</td>\n",
       "      <td>3480</td>\n",
       "      <td>2718</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>072016</td>\n",
       "      <td>Eine Quote oder keine Quote?  Die ungarische R...</td>\n",
       "      <td>[quote, quote, ungarische, regierung, lehnt, q...</td>\n",
       "      <td>quote quote ungarische regierung lehnt quotier...</td>\n",
       "      <td>1476</td>\n",
       "      <td>1099</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>072017</td>\n",
       "      <td>Wie Angela Merkel Wahl-Kampf macht  Angela Mer...</td>\n",
       "      <td>[angela, merkel, macht, angela, merkel, endlic...</td>\n",
       "      <td>angela merkel macht angela merkel endlich sage...</td>\n",
       "      <td>2348</td>\n",
       "      <td>1695</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>072018</td>\n",
       "      <td>Seehofers Plan zur Flüchtlings-Politik  Der In...</td>\n",
       "      <td>[seehofers, plan, horst, seehofer, stellt, pla...</td>\n",
       "      <td>seehofers plan horst seehofer stellt plan weni...</td>\n",
       "      <td>3622</td>\n",
       "      <td>2271</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>082014</td>\n",
       "      <td>Streit um Aufnahme von Flüchtlingen  In Sachen...</td>\n",
       "      <td>[streit, aufnahme, flüchtlingen, sachen, human...</td>\n",
       "      <td>streit aufnahme flüchtlingen sachen humanitäre...</td>\n",
       "      <td>2011</td>\n",
       "      <td>1578</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>082015</td>\n",
       "      <td>Flüchtlingsprognose drastisch erhöht  Die Bund...</td>\n",
       "      <td>[flüchtlingsprognose, drastisch, erhöht, bunde...</td>\n",
       "      <td>flüchtlingsprognose drastisch erhöht bundesreg...</td>\n",
       "      <td>5588</td>\n",
       "      <td>4690</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>082016</td>\n",
       "      <td>Abschiebung? Nicht mal in Bayern.  Tübingens g...</td>\n",
       "      <td>[abschiebung, mal, bayern, tübingens, grüner, ...</td>\n",
       "      <td>abschiebung mal bayern tübingens grüner boris ...</td>\n",
       "      <td>3331</td>\n",
       "      <td>2921</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>082017</td>\n",
       "      <td>Keine Ausweisungen nach Ungarn  Seit dem 11. A...</td>\n",
       "      <td>[ausweisungen, ungarn, seit, april, wurden, ge...</td>\n",
       "      <td>ausweisungen ungarn seit april wurden geflücht...</td>\n",
       "      <td>1966</td>\n",
       "      <td>1560</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>082018</td>\n",
       "      <td>Ein Grundrecht als Zombie Heute ist der Schutz...</td>\n",
       "      <td>[grundrecht, zombie, heute, schutz, geflüchtet...</td>\n",
       "      <td>grundrecht zombie heute schutz geflüchtete höh...</td>\n",
       "      <td>2952</td>\n",
       "      <td>2191</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>092014</td>\n",
       "      <td>Von der Elbe führt kein Weg zu Ebola  Hamburg ...</td>\n",
       "      <td>[elbe, führt, ebola, hamburg, schiebt, vorerst...</td>\n",
       "      <td>elbe führt ebola hamburg schiebt vorerst flüch...</td>\n",
       "      <td>1791</td>\n",
       "      <td>1379</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>092015</td>\n",
       "      <td>„Können Asylstandards nicht halten“  Tübingens...</td>\n",
       "      <td>[asylstandards, halten, tübingens, oberbürgerm...</td>\n",
       "      <td>asylstandards halten tübingens oberbürgermeist...</td>\n",
       "      <td>6775</td>\n",
       "      <td>5187</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>092016</td>\n",
       "      <td>Niedersachsen schickt Syrer nach Griechenland ...</td>\n",
       "      <td>[niedersachsen, schickt, syrer, griechenland, ...</td>\n",
       "      <td>niedersachsen schickt syrer griechenland bunde...</td>\n",
       "      <td>3717</td>\n",
       "      <td>2987</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>092017</td>\n",
       "      <td>Gut zureden und drohen  Ungarn und Polen lehne...</td>\n",
       "      <td>[gut, zureden, drohen, ungarn, polen, lehnen, ...</td>\n",
       "      <td>gut zureden drohen ungarn polen lehnen flüchtl...</td>\n",
       "      <td>1499</td>\n",
       "      <td>1173</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>092018</td>\n",
       "      <td>Pauschal zur Bedrohung erklärt Neuer Schlag vo...</td>\n",
       "      <td>[pauschal, bedrohung, erklärt, neuer, schlag, ...</td>\n",
       "      <td>pauschal bedrohung erklärt neuer schlag italie...</td>\n",
       "      <td>1388</td>\n",
       "      <td>1043</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>102014</td>\n",
       "      <td>„Die Stimmung darf nicht kippen“  Dass Deutsch...</td>\n",
       "      <td>[stimmung, darf, kippen, deutschland, einwande...</td>\n",
       "      <td>stimmung darf kippen deutschland einwanderungs...</td>\n",
       "      <td>1804</td>\n",
       "      <td>1545</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>102015</td>\n",
       "      <td>BKA warnt vor rechtsextremer Gewalt  Die Siche...</td>\n",
       "      <td>[bka, warnt, rechtsextremer, gewalt, sicherhei...</td>\n",
       "      <td>bka warnt rechtsextremer gewalt sicherheitsbeh...</td>\n",
       "      <td>10299</td>\n",
       "      <td>7772</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>102016</td>\n",
       "      <td>Getreten und von Hunden gejagt  Amnesty Intern...</td>\n",
       "      <td>[getreten, hunden, gejagt, amnesty, internatio...</td>\n",
       "      <td>getreten hunden gejagt amnesty international k...</td>\n",
       "      <td>3572</td>\n",
       "      <td>2878</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>102017</td>\n",
       "      <td>Angriff auf Flüchtlingsheim  Drei betrunkene M...</td>\n",
       "      <td>[angriff, flüchtlingsheim, drei, betrunkene, m...</td>\n",
       "      <td>angriff flüchtlingsheim drei betrunkene männer...</td>\n",
       "      <td>1154</td>\n",
       "      <td>973</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>102018</td>\n",
       "      <td>Italien will Flüge nicht reinlassen Italiens I...</td>\n",
       "      <td>[italien, flüge, reinlassen, italiens, innenmi...</td>\n",
       "      <td>italien flüge reinlassen italiens innenministe...</td>\n",
       "      <td>1963</td>\n",
       "      <td>1285</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>112014</td>\n",
       "      <td>Sozialschmarotzer raus  Zugereisten EU-Ausländ...</td>\n",
       "      <td>[sozialschmarotzer, raus, zugereisten, zukünft...</td>\n",
       "      <td>sozialschmarotzer raus zugereisten zukünftig h...</td>\n",
       "      <td>1676</td>\n",
       "      <td>1387</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>112015</td>\n",
       "      <td>Das ist beschlossene Sache  Eilverfahren, Absc...</td>\n",
       "      <td>[beschlossene, sache, eilverfahren, abschiebun...</td>\n",
       "      <td>beschlossene sache eilverfahren abschiebungen ...</td>\n",
       "      <td>6989</td>\n",
       "      <td>5672</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>112016</td>\n",
       "      <td>Kein Asyl für syrische Bürgerkriegsflüchtlinge...</td>\n",
       "      <td>[asyl, syrische, bürgerkriegsflüchtlinge, schl...</td>\n",
       "      <td>asyl syrische bürgerkriegsflüchtlinge schleswi...</td>\n",
       "      <td>2758</td>\n",
       "      <td>2409</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>112017</td>\n",
       "      <td>Bewährungsstrafe für Brandanschlag  Weil er ei...</td>\n",
       "      <td>[bewährungsstrafe, brandanschlag, flüchtlingsh...</td>\n",
       "      <td>bewährungsstrafe brandanschlag flüchtlingsheim...</td>\n",
       "      <td>1485</td>\n",
       "      <td>1374</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>112018</td>\n",
       "      <td>Trump will Asylrecht verschärfen Donald Trump ...</td>\n",
       "      <td>[trump, asylrecht, verschärfen, donald, trump,...</td>\n",
       "      <td>trump asylrecht verschärfen donald trump setzt...</td>\n",
       "      <td>9363</td>\n",
       "      <td>4238</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>122014</td>\n",
       "      <td>Perspektive Arbeit  Spätestens ab dem sechsten...</td>\n",
       "      <td>[perspektive, arbeit, spätestens, ab, sechsten...</td>\n",
       "      <td>perspektive arbeit spätestens ab sechsten mona...</td>\n",
       "      <td>1533</td>\n",
       "      <td>1190</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>122015</td>\n",
       "      <td>Existenzminimum auch für EU-Bürger  Wer länger...</td>\n",
       "      <td>[existenzminimum, wer, länger, sechs, monate, ...</td>\n",
       "      <td>existenzminimum wer länger sechs monate deutsc...</td>\n",
       "      <td>5651</td>\n",
       "      <td>4552</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>122016</td>\n",
       "      <td>Einnahmequelle für Terroristen  Ob Heirat, Sex...</td>\n",
       "      <td>[einnahmequelle, terroristen, heirat, sexsklav...</td>\n",
       "      <td>einnahmequelle terroristen heirat sexsklaverei...</td>\n",
       "      <td>4473</td>\n",
       "      <td>3484</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>122017</td>\n",
       "      <td>Lasset die Kindlein zu mir kommen  Pro Asyl ap...</td>\n",
       "      <td>[lasset, kindlein, kommen, pro, asyl, appellie...</td>\n",
       "      <td>lasset kindlein kommen pro asyl appelliert bun...</td>\n",
       "      <td>1972</td>\n",
       "      <td>1583</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>122018</td>\n",
       "      <td>Baerbock als Boris Seit Jahren widmet sich die...</td>\n",
       "      <td>[baerbock, boris, seit, jahren, widmet, politi...</td>\n",
       "      <td>baerbock boris seit jahren widmet politik größ...</td>\n",
       "      <td>1928</td>\n",
       "      <td>1344</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content  \\\n",
       "0   012014  Mit Absicht zum Kentern gebracht?  Drei Frauen...   \n",
       "1   012015  Schnellverfahren für Asylbewerber  Abschieben ...   \n",
       "2   012016  Mehr Anlass zur Abschiebung  Die Gründe für di...   \n",
       "3   012017  Abschiebung aussetzen  Opfer rechter Gewalt in...   \n",
       "4   012018  Asyl nicht nur im Ankunftsland  Ein syrischer ...   \n",
       "5   022014  taz.Info  Eine Veranstaltung zur Wahrnehmung, ...   \n",
       "6   022015  Schleuser müssen mit Haft rechnen  Fluchthelfe...   \n",
       "7   022016  Kaum Schutz vor Gewalt  Der Missbrauchsbeauftr...   \n",
       "8   022017  Hunderte Rohingya getötet  Ein Bericht der UNO...   \n",
       "9   022018  Sanktionen für Flüchtlingskritiker  Merkel for...   \n",
       "10  032014  Flüchtlinge diskutieren über Angebot  Integrat...   \n",
       "11  032015  Mehr Asylbewerber abgeschoben  Die Zahl der Fl...   \n",
       "12  032016  Der Deal ist laut Pro Asyl illegal  Die Türkei...   \n",
       "13  032017  Gesetz scheitert  Maghreb-Staaten sollten als ...   \n",
       "14  032018  Abschiebung in Dauerschleife  Eine afghanische...   \n",
       "15  042014  Zehn Minuten Zeit sparen  Innenminister Thomas...   \n",
       "16  042015  Sogar de Maizière gibt sich liberal  Außen- un...   \n",
       "17  042016  EU-Kommission legt „Optionen“ vor  Wie weiter ...   \n",
       "18  042017  UNO fordert Aussetzung von Dublin II  In Ungar...   \n",
       "19  042018  Familien-Nachzug für Flüchtlinge  Viele Flücht...   \n",
       "20  052014  Marsch gegen Brüssel  Europäische Asylbewerber...   \n",
       "21  052015  Asylverfahren behindern Integration  Lange Ver...   \n",
       "22  052016  Bundespolizei wehrt sich gegen Kritik  Die Bun...   \n",
       "23  052017  Noch einen drauf  Die Regeln für den Umgang mi...   \n",
       "24  052018  „Die Abschiebung ist rechtswidrig“  Die Abschi...   \n",
       "25  062014  „Politik lebt von Symbolen“  Der FDP-Innenpoli...   \n",
       "26  062015  Nichts als Stückwerk  Pro Asyl kritisiert die ...   \n",
       "27  062016  Rechtsstaatliche Skrupellosigkeit  Die Türkei ...   \n",
       "28  062017  Brüssel geht gegen Verweigerer vor  Polen, Ung...   \n",
       "29  062018  Das Prinzip der Unsolidarität  Einige Länder w...   \n",
       "30  072014  Die allerletzten in der Schlange  Der Senat ve...   \n",
       "31  072015  Aus „unsicheren“ Staaten  Flüchtlinge werden i...   \n",
       "32  072016  Eine Quote oder keine Quote?  Die ungarische R...   \n",
       "33  072017  Wie Angela Merkel Wahl-Kampf macht  Angela Mer...   \n",
       "34  072018  Seehofers Plan zur Flüchtlings-Politik  Der In...   \n",
       "35  082014  Streit um Aufnahme von Flüchtlingen  In Sachen...   \n",
       "36  082015  Flüchtlingsprognose drastisch erhöht  Die Bund...   \n",
       "37  082016  Abschiebung? Nicht mal in Bayern.  Tübingens g...   \n",
       "38  082017  Keine Ausweisungen nach Ungarn  Seit dem 11. A...   \n",
       "39  082018  Ein Grundrecht als Zombie Heute ist der Schutz...   \n",
       "40  092014  Von der Elbe führt kein Weg zu Ebola  Hamburg ...   \n",
       "41  092015  „Können Asylstandards nicht halten“  Tübingens...   \n",
       "42  092016  Niedersachsen schickt Syrer nach Griechenland ...   \n",
       "43  092017  Gut zureden und drohen  Ungarn und Polen lehne...   \n",
       "44  092018  Pauschal zur Bedrohung erklärt Neuer Schlag vo...   \n",
       "45  102014  „Die Stimmung darf nicht kippen“  Dass Deutsch...   \n",
       "46  102015  BKA warnt vor rechtsextremer Gewalt  Die Siche...   \n",
       "47  102016  Getreten und von Hunden gejagt  Amnesty Intern...   \n",
       "48  102017  Angriff auf Flüchtlingsheim  Drei betrunkene M...   \n",
       "49  102018  Italien will Flüge nicht reinlassen Italiens I...   \n",
       "50  112014  Sozialschmarotzer raus  Zugereisten EU-Ausländ...   \n",
       "51  112015  Das ist beschlossene Sache  Eilverfahren, Absc...   \n",
       "52  112016  Kein Asyl für syrische Bürgerkriegsflüchtlinge...   \n",
       "53  112017  Bewährungsstrafe für Brandanschlag  Weil er ei...   \n",
       "54  112018  Trump will Asylrecht verschärfen Donald Trump ...   \n",
       "55  122014  Perspektive Arbeit  Spätestens ab dem sechsten...   \n",
       "56  122015  Existenzminimum auch für EU-Bürger  Wer länger...   \n",
       "57  122016  Einnahmequelle für Terroristen  Ob Heirat, Sex...   \n",
       "58  122017  Lasset die Kindlein zu mir kommen  Pro Asyl ap...   \n",
       "59  122018  Baerbock als Boris Seit Jahren widmet sich die...   \n",
       "\n",
       "                                         nlpprocessed  \\\n",
       "0   [absicht, kentern, gebracht, drei, frauen, neu...   \n",
       "1   [schnellverfahren, asylbewerber, abschieben, a...   \n",
       "2   [mehr, anlass, abschiebung, gründe, ausweisung...   \n",
       "3   [abschiebung, aussetzen, opfer, rechter, gewal...   \n",
       "4   [asyl, ankunftsland, syrischer, staatsangehöri...   \n",
       "5   [veranstaltung, wahrnehmung, realität, perspek...   \n",
       "6   [schleuser, müssen, haft, rechnen, fluchthelfe...   \n",
       "7   [kaum, schutz, gewalt, missbrauchsbeauftragte,...   \n",
       "8   [hunderte, rohingya, getötet, bericht, uno, do...   \n",
       "9   [sanktionen, flüchtlingskritiker, merkel, ford...   \n",
       "10  [flüchtlinge, diskutieren, angebot, integratio...   \n",
       "11  [mehr, asylbewerber, abgeschoben, zahl, flücht...   \n",
       "12  [deal, laut, pro, asyl, illegal, türkei, sei, ...   \n",
       "13  [gesetz, scheitert, sollten, sicher, genug, ab...   \n",
       "14  [abschiebung, dauerschleife, afghanische, fami...   \n",
       "15  [zehn, minuten, zeit, sparen, innenminister, t...   \n",
       "16  [sogar, de, maizière, gibt, liberal, innenmini...   \n",
       "17  [legt, optionen, dublin, rechte, asylsuchenden...   \n",
       "18  [uno, fordert, aussetzung, dublin, ii, ungarn,...   \n",
       "19  [flüchtlinge, viele, flüchtlinge, familie, deu...   \n",
       "20  [marsch, brüssel, europäische, asylbewerber, g...   \n",
       "21  [asylverfahren, behindern, integration, lange,...   \n",
       "22  [bundespolizei, wehrt, kritik, bundespolizei, ...   \n",
       "23  [drauf, regeln, umgang, asylsuchenden, verschä...   \n",
       "24  [abschiebung, rechtswidrig, abschiebung, verha...   \n",
       "25  [politik, lebt, symbolen, oetjen, niedersächsi...   \n",
       "26  [stückwerk, pro, asyl, kritisiert, ergebnisse,...   \n",
       "27  [rechtsstaatliche, skrupellosigkeit, türkei, f...   \n",
       "28  [brüssel, geht, verweigerer, polen, ungarn, ts...   \n",
       "29  [prinzip, unsolidarität, länder, fordern, fair...   \n",
       "30  [allerletzten, schlange, senat, verweigert, fl...   \n",
       "31  [unsicheren, staaten, flüchtlinge, deutschland...   \n",
       "32  [quote, quote, ungarische, regierung, lehnt, q...   \n",
       "33  [angela, merkel, macht, angela, merkel, endlic...   \n",
       "34  [seehofers, plan, horst, seehofer, stellt, pla...   \n",
       "35  [streit, aufnahme, flüchtlingen, sachen, human...   \n",
       "36  [flüchtlingsprognose, drastisch, erhöht, bunde...   \n",
       "37  [abschiebung, mal, bayern, tübingens, grüner, ...   \n",
       "38  [ausweisungen, ungarn, seit, april, wurden, ge...   \n",
       "39  [grundrecht, zombie, heute, schutz, geflüchtet...   \n",
       "40  [elbe, führt, ebola, hamburg, schiebt, vorerst...   \n",
       "41  [asylstandards, halten, tübingens, oberbürgerm...   \n",
       "42  [niedersachsen, schickt, syrer, griechenland, ...   \n",
       "43  [gut, zureden, drohen, ungarn, polen, lehnen, ...   \n",
       "44  [pauschal, bedrohung, erklärt, neuer, schlag, ...   \n",
       "45  [stimmung, darf, kippen, deutschland, einwande...   \n",
       "46  [bka, warnt, rechtsextremer, gewalt, sicherhei...   \n",
       "47  [getreten, hunden, gejagt, amnesty, internatio...   \n",
       "48  [angriff, flüchtlingsheim, drei, betrunkene, m...   \n",
       "49  [italien, flüge, reinlassen, italiens, innenmi...   \n",
       "50  [sozialschmarotzer, raus, zugereisten, zukünft...   \n",
       "51  [beschlossene, sache, eilverfahren, abschiebun...   \n",
       "52  [asyl, syrische, bürgerkriegsflüchtlinge, schl...   \n",
       "53  [bewährungsstrafe, brandanschlag, flüchtlingsh...   \n",
       "54  [trump, asylrecht, verschärfen, donald, trump,...   \n",
       "55  [perspektive, arbeit, spätestens, ab, sechsten...   \n",
       "56  [existenzminimum, wer, länger, sechs, monate, ...   \n",
       "57  [einnahmequelle, terroristen, heirat, sexsklav...   \n",
       "58  [lasset, kindlein, kommen, pro, asyl, appellie...   \n",
       "59  [baerbock, boris, seit, jahren, widmet, politi...   \n",
       "\n",
       "                                           liststring  positive words  \\\n",
       "0   absicht kentern gebracht drei frauen neun kind...            1293   \n",
       "1   schnellverfahren asylbewerber abschieben ausbi...            1527   \n",
       "2   mehr anlass abschiebung gründe ausweisung stra...            8395   \n",
       "3   abschiebung aussetzen opfer rechter gewalt bra...            3581   \n",
       "4   asyl ankunftsland syrischer staatsangehöriger ...            2688   \n",
       "5   veranstaltung wahrnehmung realität perspektive...            1121   \n",
       "6   schleuser müssen haft rechnen fluchthelfer rec...            2018   \n",
       "7   kaum schutz gewalt missbrauchsbeauftragte röri...            8406   \n",
       "8   hunderte rohingya getötet bericht uno dokument...            2913   \n",
       "9   sanktionen flüchtlingskritiker merkel fordert ...            1465   \n",
       "10  flüchtlinge diskutieren angebot integrationsse...            2036   \n",
       "11  mehr asylbewerber abgeschoben zahl flüchtlinge...            1044   \n",
       "12  deal laut pro asyl illegal türkei sei sicherer...            6009   \n",
       "13  gesetz scheitert sollten sicher genug abschieb...            2997   \n",
       "14  abschiebung dauerschleife afghanische familie ...            1306   \n",
       "15  zehn minuten zeit sparen innenminister thomas ...            1582   \n",
       "16  sogar de maizière gibt liberal innenminister d...            1676   \n",
       "17  legt optionen dublin rechte asylsuchenden soll...            4571   \n",
       "18  uno fordert aussetzung dublin ii ungarn asylsu...            1138   \n",
       "19  flüchtlinge viele flüchtlinge familie deutschl...            1724   \n",
       "20  marsch brüssel europäische asylbewerber gemein...            1987   \n",
       "21  asylverfahren behindern integration lange verf...            1466   \n",
       "22  bundespolizei wehrt kritik bundespolizei mehr ...            2483   \n",
       "23  drauf regeln umgang asylsuchenden verschärft b...            1415   \n",
       "24  abschiebung rechtswidrig abschiebung verhaftun...            2951   \n",
       "25  politik lebt symbolen oetjen niedersächsische ...            2007   \n",
       "26  stückwerk pro asyl kritisiert ergebnisse flüch...            3010   \n",
       "27  rechtsstaatliche skrupellosigkeit türkei fühlt...            3975   \n",
       "28  brüssel geht verweigerer polen ungarn tschechi...            3071   \n",
       "29  prinzip unsolidarität länder fordern faire umv...            4496   \n",
       "30  allerletzten schlange senat verweigert flüchtl...            2052   \n",
       "31  unsicheren staaten flüchtlinge deutschland unt...            3480   \n",
       "32  quote quote ungarische regierung lehnt quotier...            1476   \n",
       "33  angela merkel macht angela merkel endlich sage...            2348   \n",
       "34  seehofers plan horst seehofer stellt plan weni...            3622   \n",
       "35  streit aufnahme flüchtlingen sachen humanitäre...            2011   \n",
       "36  flüchtlingsprognose drastisch erhöht bundesreg...            5588   \n",
       "37  abschiebung mal bayern tübingens grüner boris ...            3331   \n",
       "38  ausweisungen ungarn seit april wurden geflücht...            1966   \n",
       "39  grundrecht zombie heute schutz geflüchtete höh...            2952   \n",
       "40  elbe führt ebola hamburg schiebt vorerst flüch...            1791   \n",
       "41  asylstandards halten tübingens oberbürgermeist...            6775   \n",
       "42  niedersachsen schickt syrer griechenland bunde...            3717   \n",
       "43  gut zureden drohen ungarn polen lehnen flüchtl...            1499   \n",
       "44  pauschal bedrohung erklärt neuer schlag italie...            1388   \n",
       "45  stimmung darf kippen deutschland einwanderungs...            1804   \n",
       "46  bka warnt rechtsextremer gewalt sicherheitsbeh...           10299   \n",
       "47  getreten hunden gejagt amnesty international k...            3572   \n",
       "48  angriff flüchtlingsheim drei betrunkene männer...            1154   \n",
       "49  italien flüge reinlassen italiens innenministe...            1963   \n",
       "50  sozialschmarotzer raus zugereisten zukünftig h...            1676   \n",
       "51  beschlossene sache eilverfahren abschiebungen ...            6989   \n",
       "52  asyl syrische bürgerkriegsflüchtlinge schleswi...            2758   \n",
       "53  bewährungsstrafe brandanschlag flüchtlingsheim...            1485   \n",
       "54  trump asylrecht verschärfen donald trump setzt...            9363   \n",
       "55  perspektive arbeit spätestens ab sechsten mona...            1533   \n",
       "56  existenzminimum wer länger sechs monate deutsc...            5651   \n",
       "57  einnahmequelle terroristen heirat sexsklaverei...            4473   \n",
       "58  lasset kindlein kommen pro asyl appelliert bun...            1972   \n",
       "59  baerbock boris seit jahren widmet politik größ...            1928   \n",
       "\n",
       "    negative words   overall  \n",
       "0              994  positive  \n",
       "1             1187  positive  \n",
       "2             6580  positive  \n",
       "3             2746  positive  \n",
       "4             2024  positive  \n",
       "5              992  positive  \n",
       "6             1717  positive  \n",
       "7             6907  positive  \n",
       "8             2220  positive  \n",
       "9             1197  positive  \n",
       "10            1527  positive  \n",
       "11             699  positive  \n",
       "12            4844  positive  \n",
       "13            2161  positive  \n",
       "14             963  positive  \n",
       "15            1434  positive  \n",
       "16            1303  positive  \n",
       "17            4157  positive  \n",
       "18             875  positive  \n",
       "19            1134  positive  \n",
       "20            1633  positive  \n",
       "21            1210  positive  \n",
       "22            2434  positive  \n",
       "23            1112  positive  \n",
       "24            2025  positive  \n",
       "25            1543  positive  \n",
       "26            2465  positive  \n",
       "27            3374  positive  \n",
       "28            2356  positive  \n",
       "29            3159  positive  \n",
       "30            1722  positive  \n",
       "31            2718  positive  \n",
       "32            1099  positive  \n",
       "33            1695  positive  \n",
       "34            2271  positive  \n",
       "35            1578  positive  \n",
       "36            4690  positive  \n",
       "37            2921  positive  \n",
       "38            1560  positive  \n",
       "39            2191  positive  \n",
       "40            1379  positive  \n",
       "41            5187  positive  \n",
       "42            2987  positive  \n",
       "43            1173  positive  \n",
       "44            1043  positive  \n",
       "45            1545  positive  \n",
       "46            7772  positive  \n",
       "47            2878  positive  \n",
       "48             973  positive  \n",
       "49            1285  positive  \n",
       "50            1387  positive  \n",
       "51            5672  positive  \n",
       "52            2409  positive  \n",
       "53            1374  positive  \n",
       "54            4238  positive  \n",
       "55            1190  positive  \n",
       "56            4552  positive  \n",
       "57            3484  positive  \n",
       "58            1583  positive  \n",
       "59            1344  positive  "
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(taz_rel_final['positive words'] > taz_rel_final['negative words']),\n",
    "(taz_rel_final['negative words'] > taz_rel_final['positive words']),\n",
    "(taz_rel_final['negative words'] == taz_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "taz_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "taz_rel_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Mit Absicht zum Kentern gebracht?  Drei Frauen...</td>\n",
       "      <td>absicht kentern gebracht drei frauen neun kind...</td>\n",
       "      <td>1293</td>\n",
       "      <td>994</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>Schnellverfahren für Asylbewerber  Abschieben ...</td>\n",
       "      <td>schnellverfahren asylbewerber abschieben ausbi...</td>\n",
       "      <td>1527</td>\n",
       "      <td>1187</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Mehr Anlass zur Abschiebung  Die Gründe für di...</td>\n",
       "      <td>mehr anlass abschiebung gründe ausweisung stra...</td>\n",
       "      <td>8395</td>\n",
       "      <td>6580</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Abschiebung aussetzen  Opfer rechter Gewalt in...</td>\n",
       "      <td>abschiebung aussetzen opfer rechter gewalt bra...</td>\n",
       "      <td>3581</td>\n",
       "      <td>2746</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Asyl nicht nur im Ankunftsland  Ein syrischer ...</td>\n",
       "      <td>asyl ankunftsland syrischer staatsangehöriger ...</td>\n",
       "      <td>2688</td>\n",
       "      <td>2024</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012014  Mit Absicht zum Kentern gebracht?  Drei Frauen...   \n",
       "1     012015  Schnellverfahren für Asylbewerber  Abschieben ...   \n",
       "2     012016  Mehr Anlass zur Abschiebung  Die Gründe für di...   \n",
       "3     012017  Abschiebung aussetzen  Opfer rechter Gewalt in...   \n",
       "4     012018  Asyl nicht nur im Ankunftsland  Ein syrischer ...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  absicht kentern gebracht drei frauen neun kind...            1293   \n",
       "1  schnellverfahren asylbewerber abschieben ausbi...            1527   \n",
       "2  mehr anlass abschiebung gründe ausweisung stra...            8395   \n",
       "3  abschiebung aussetzen opfer rechter gewalt bra...            3581   \n",
       "4  asyl ankunftsland syrischer staatsangehöriger ...            2688   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0             994          positive  \n",
       "1            1187          positive  \n",
       "2            6580          positive  \n",
       "3            2746          positive  \n",
       "4            2024          positive  "
      ]
     },
     "execution_count": 100,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del taz_rel_final['nlpprocessed']\n",
    "taz_rel_final = taz_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "taz_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/taz_sentiment.csv\")\n",
    "taz_rel_final.head()\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Welt\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "#welt\n",
    "\n",
    "\n",
    "welt = pd.read_csv(\"/Users/ashrakatelshehawy/welt_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None,  error_bad_lines=False)\n",
    "\n",
    "#remove unnecessary columns\n",
    "welt_rel = welt.drop(welt.columns[[0,1,5,6,7,8]], axis=1)\n",
    "welt_rel\n",
    "\n",
    "\n",
    "\n",
    "#give column names\n",
    "welt_rel.columns = ['date', 'title',\"content\"]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>20180102</td>\n",
       "      <td>Bayerns Innenminister: Herrmann will Alter von...</td>\n",
       "      <td>In der Debatte um eine medizinische Alterprüf...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4041</th>\n",
       "      <td>20160112</td>\n",
       "      <td>Soldaten helfen Flüchtlingen nur noch bis Sommer</td>\n",
       "      <td>Von der Leyen scheint entschlossen, 2016 heikl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3803</th>\n",
       "      <td>20151223</td>\n",
       "      <td>Diese Berufe sind in der Flüchtlingskrise gefragt</td>\n",
       "      <td>Islamische Religionslehrer, Schulpsychologen, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2350</th>\n",
       "      <td>20180915</td>\n",
       "      <td>Grenzsicherung: Mexikos unsichtbare Mauer</td>\n",
       "      <td>Quelle: REUTERS Ein Lkw-Reifen, ein paar als S...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>20151222</td>\n",
       "      <td>Es ist kein Wunder, dass alle nach Deutschland...</td>\n",
       "      <td>Ungarns Botschafter Györkös hat die Haltung se...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date                                              title  \\\n",
       "153   20180102  Bayerns Innenminister: Herrmann will Alter von...   \n",
       "4041  20160112   Soldaten helfen Flüchtlingen nur noch bis Sommer   \n",
       "3803  20151223  Diese Berufe sind in der Flüchtlingskrise gefragt   \n",
       "2350  20180915          Grenzsicherung: Mexikos unsichtbare Mauer   \n",
       "198   20151222  Es ist kein Wunder, dass alle nach Deutschland...   \n",
       "\n",
       "                                                content  \n",
       "153    In der Debatte um eine medizinische Alterprüf...  \n",
       "4041  Von der Leyen scheint entschlossen, 2016 heikl...  \n",
       "3803  Islamische Religionslehrer, Schulpsychologen, ...  \n",
       "2350  Quelle: REUTERS Ein Lkw-Reifen, ein paar als S...  \n",
       "198   Ungarns Botschafter Györkös hat die Haltung se...  "
      ]
     },
     "execution_count": 102,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#sample of 25\n",
    "welt_rel_sample=welt_rel.sample(25)\n",
    "welt_rel_sample.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Göring-Eckhard kritisiert \"Ängsteschüren\" der ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>CSU will Asylbewerber in Schnellverfahren absc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>2299 Flüchtlinge klagen gegen das BAMF Tausend...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Präsidenten-Erlass: Trump verhängt Einreisesto...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Ausweisungsrecht: Warum jugendliche Straftäter...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content\n",
       "0  012014  Göring-Eckhard kritisiert \"Ängsteschüren\" der ...\n",
       "1  012015  CSU will Asylbewerber in Schnellverfahren absc...\n",
       "2  012016  2299 Flüchtlinge klagen gegen das BAMF Tausend...\n",
       "3  012017  Präsidenten-Erlass: Trump verhängt Einreisesto...\n",
       "4  012018  Ausweisungsrecht: Warum jugendliche Straftäter..."
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merge title and content in one variable - becaue we will use the whole text to produce the embeddings\n",
    "welt_rel[\"content\"]=welt_rel[\"title\"]+[\" \"]+welt_rel[\"content\"]\n",
    "#remove the title column\n",
    "welt_rel.drop(welt_rel.columns[1], axis=1, inplace=True)\n",
    "\n",
    "\n",
    "# lets remove the day because we dont need it\n",
    "welt_rel[\"date\"] = welt_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "welt_rel\n",
    "\n",
    "\n",
    "\n",
    "#extract last two digits to change months structure\n",
    "\n",
    "#first change to charachter\n",
    "welt_rel['date1'] = welt_rel['date'].apply(str)\n",
    "\n",
    "#new column with months and years\n",
    "welt_rel['month'] = welt_rel['date1'].str[4:6]\n",
    "welt_rel['year'] = welt_rel['date1'].str[0:4]\n",
    "\n",
    "\n",
    "\n",
    "#same date structure as the other datasets\n",
    "welt_rel[\"my\"]=welt_rel[\"month\"]+welt_rel[\"year\"]\n",
    "welt_rel\n",
    "\n",
    "#change month interger to month name\n",
    "\n",
    "#revert back to interger\n",
    "welt_rel['month'] = welt_rel['month'].apply(int)\n",
    "\n",
    "welt_rel2=welt_rel\n",
    "\n",
    "#use calendar to change month number to name\n",
    "welt_rel2['month'] = welt_rel2['month'].apply(lambda x: calendar.month_name[x])\n",
    "\n",
    "\n",
    "#have a consistent date variable for all datasets\n",
    "welt_rel2[\"month-year\"] = welt_rel2[\"month\"] +[\" \"]+ welt_rel2[\"year\"] \n",
    "\n",
    "\n",
    "#aggregate over months\n",
    "welt_rel2=welt_rel2.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "welt_rel2\n",
    "\n",
    "welt_rel2.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp\n",
    "welt_rel2[\"nlpprocessed\"]=welt_rel2['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "welt_rel2['liststring'] = [','.join(map(str, l)) for l in welt_rel2['nlpprocessed']]\n",
    "welt_rel2['liststring'] = (welt_rel2['liststring'].replace(',',' ', regex=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {},
   "outputs": [],
   "source": [
    "welt_rel_final=welt_rel2\n",
    "welt_rel_final['positive words'] = welt_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "welt_rel_final['negative words'] = welt_rel_final['liststring'].str.count('|'.join(negative_list1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Göring-Eckhard kritisiert \"Ängsteschüren\" der ...</td>\n",
       "      <td>[kritisiert, ängsteschüren, union, bitte, such...</td>\n",
       "      <td>kritisiert ängsteschüren union bitte suchen ka...</td>\n",
       "      <td>1367</td>\n",
       "      <td>863</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>CSU will Asylbewerber in Schnellverfahren absc...</td>\n",
       "      <td>[csu, asylbewerber, schnellverfahren, abschieb...</td>\n",
       "      <td>csu asylbewerber schnellverfahren abschieben b...</td>\n",
       "      <td>2262</td>\n",
       "      <td>1761</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>2299 Flüchtlinge klagen gegen das BAMF Tausend...</td>\n",
       "      <td>[flüchtlinge, klagen, bamf, tausende, flüchtli...</td>\n",
       "      <td>flüchtlinge klagen bamf tausende flüchtlinge e...</td>\n",
       "      <td>10625</td>\n",
       "      <td>8139</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Präsidenten-Erlass: Trump verhängt Einreisesto...</td>\n",
       "      <td>[trump, verhängt, einreisestopp, viele, muslim...</td>\n",
       "      <td>trump verhängt einreisestopp viele muslime que...</td>\n",
       "      <td>5231</td>\n",
       "      <td>3452</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Ausweisungsrecht: Warum jugendliche Straftäter...</td>\n",
       "      <td>[ausweisungsrecht, warum, jugendliche, straftä...</td>\n",
       "      <td>ausweisungsrecht warum jugendliche straftäter ...</td>\n",
       "      <td>6188</td>\n",
       "      <td>4090</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content  \\\n",
       "0  012014  Göring-Eckhard kritisiert \"Ängsteschüren\" der ...   \n",
       "1  012015  CSU will Asylbewerber in Schnellverfahren absc...   \n",
       "2  012016  2299 Flüchtlinge klagen gegen das BAMF Tausend...   \n",
       "3  012017  Präsidenten-Erlass: Trump verhängt Einreisesto...   \n",
       "4  012018  Ausweisungsrecht: Warum jugendliche Straftäter...   \n",
       "\n",
       "                                        nlpprocessed  \\\n",
       "0  [kritisiert, ängsteschüren, union, bitte, such...   \n",
       "1  [csu, asylbewerber, schnellverfahren, abschieb...   \n",
       "2  [flüchtlinge, klagen, bamf, tausende, flüchtli...   \n",
       "3  [trump, verhängt, einreisestopp, viele, muslim...   \n",
       "4  [ausweisungsrecht, warum, jugendliche, straftä...   \n",
       "\n",
       "                                          liststring  positive words  \\\n",
       "0  kritisiert ängsteschüren union bitte suchen ka...            1367   \n",
       "1  csu asylbewerber schnellverfahren abschieben b...            2262   \n",
       "2  flüchtlinge klagen bamf tausende flüchtlinge e...           10625   \n",
       "3  trump verhängt einreisestopp viele muslime que...            5231   \n",
       "4  ausweisungsrecht warum jugendliche straftäter ...            6188   \n",
       "\n",
       "   negative words   overall  \n",
       "0             863  positive  \n",
       "1            1761  positive  \n",
       "2            8139  positive  \n",
       "3            3452  positive  \n",
       "4            4090  positive  "
      ]
     },
     "execution_count": 106,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(welt_rel_final['positive words'] > welt_rel_final['negative words']),\n",
    "(welt_rel_final['negative words'] > welt_rel_final['positive words']),\n",
    "(welt_rel_final['negative words'] == welt_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "welt_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "welt_rel_final.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Göring-Eckhard kritisiert \"Ängsteschüren\" der ...</td>\n",
       "      <td>kritisiert ängsteschüren union bitte suchen ka...</td>\n",
       "      <td>1367</td>\n",
       "      <td>863</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>CSU will Asylbewerber in Schnellverfahren absc...</td>\n",
       "      <td>csu asylbewerber schnellverfahren abschieben b...</td>\n",
       "      <td>2262</td>\n",
       "      <td>1761</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>2299 Flüchtlinge klagen gegen das BAMF Tausend...</td>\n",
       "      <td>flüchtlinge klagen bamf tausende flüchtlinge e...</td>\n",
       "      <td>10625</td>\n",
       "      <td>8139</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Präsidenten-Erlass: Trump verhängt Einreisesto...</td>\n",
       "      <td>trump verhängt einreisestopp viele muslime que...</td>\n",
       "      <td>5231</td>\n",
       "      <td>3452</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Ausweisungsrecht: Warum jugendliche Straftäter...</td>\n",
       "      <td>ausweisungsrecht warum jugendliche straftäter ...</td>\n",
       "      <td>6188</td>\n",
       "      <td>4090</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012014  Göring-Eckhard kritisiert \"Ängsteschüren\" der ...   \n",
       "1     012015  CSU will Asylbewerber in Schnellverfahren absc...   \n",
       "2     012016  2299 Flüchtlinge klagen gegen das BAMF Tausend...   \n",
       "3     012017  Präsidenten-Erlass: Trump verhängt Einreisesto...   \n",
       "4     012018  Ausweisungsrecht: Warum jugendliche Straftäter...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  kritisiert ängsteschüren union bitte suchen ka...            1367   \n",
       "1  csu asylbewerber schnellverfahren abschieben b...            2262   \n",
       "2  flüchtlinge klagen bamf tausende flüchtlinge e...           10625   \n",
       "3  trump verhängt einreisestopp viele muslime que...            5231   \n",
       "4  ausweisungsrecht warum jugendliche straftäter ...            6188   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0             863          positive  \n",
       "1            1761          positive  \n",
       "2            8139          positive  \n",
       "3            3452          positive  \n",
       "4            4090          positive  "
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del welt_rel_final['nlpprocessed']\n",
    "welt_rel_final = welt_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "welt_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/welt_sentiment.csv\")\n",
    "welt_rel_final.head()\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Sueddeustche"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
    "sueddeutsche = pd.read_csv(\"/Users/ashrakatelshehawy/sueddeutsche_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None,  error_bad_lines=False)\n",
    "#remove unnecessary columns\n",
    "sueddeutsche_rel = sueddeutsche.drop(welt.columns[[0,1,5,6,7,8]], axis=1)\n",
    "\n",
    "#give column names\n",
    "sueddeutsche_rel.columns = ['date', 'title',\"content\"]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>20140202</td>\n",
       "      <td>UN: \"Syrische Flüchtlingskrise ist kolossale T...</td>\n",
       "      <td>Anzeige Anzeige Anzeige Anzeige       Mindeste...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>663</th>\n",
       "      <td>20180505</td>\n",
       "      <td>USA: 57 000 Menschen aus Honduras verlieren Bl...</td>\n",
       "      <td>Sie haben Ihren Adblocker auf unserer Seite ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1532</th>\n",
       "      <td>20180703</td>\n",
       "      <td>Migration - Stuttgart</td>\n",
       "      <td>Sie haben Ihren Adblocker auf unserer Seite ak...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1672</th>\n",
       "      <td>20181130</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Hoffnungslos Tausende Flüchtlinge sitzen im me...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2071</th>\n",
       "      <td>20150712</td>\n",
       "      <td>Sea Watch - Logbuch einer beklemmenden Mission</td>\n",
       "      <td>Immobilien und Wohnungen kaufen, mieten, anbie...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          date                                              title  \\\n",
       "610   20140202  UN: \"Syrische Flüchtlingskrise ist kolossale T...   \n",
       "663   20180505  USA: 57 000 Menschen aus Honduras verlieren Bl...   \n",
       "1532  20180703                              Migration - Stuttgart   \n",
       "1672  20181130                                                NaN   \n",
       "2071  20150712     Sea Watch - Logbuch einer beklemmenden Mission   \n",
       "\n",
       "                                                content  \n",
       "610   Anzeige Anzeige Anzeige Anzeige       Mindeste...  \n",
       "663   Sie haben Ihren Adblocker auf unserer Seite ak...  \n",
       "1532  Sie haben Ihren Adblocker auf unserer Seite ak...  \n",
       "1672  Hoffnungslos Tausende Flüchtlinge sitzen im me...  \n",
       "2071  Immobilien und Wohnungen kaufen, mieten, anbie...  "
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#random sample of 25\n",
    "sueddeutsche_rel_sample=sueddeutsche_rel.sample(25)\n",
    "sueddeutsche_rel_sample.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {},
   "outputs": [],
   "source": [
    "sueddeutsche_rel['content'] =sueddeutsche_rel[['title','content']].astype(str).apply(' '.join,1)\n",
    "\n",
    "#remove the title column\n",
    "sueddeutsche_rel.drop(sueddeutsche_rel.columns[1], axis=1, inplace=True)\n",
    "\n",
    "\n",
    "# lets remove the day because we dont need it\n",
    "sueddeutsche_rel[\"date\"] = sueddeutsche_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "\n",
    "#extract last two digits to change months structure\n",
    "\n",
    "#first change to charachter\n",
    "sueddeutsche_rel['date1'] = sueddeutsche_rel['date'].apply(str)\n",
    "\n",
    "#new column with months and years\n",
    "sueddeutsche_rel['month'] = sueddeutsche_rel['date1'].str[4:6]\n",
    "sueddeutsche_rel['year'] = sueddeutsche_rel['date1'].str[0:4]\n",
    "\n",
    "\n",
    "\n",
    "#same date structure as the other datasets\n",
    "sueddeutsche_rel[\"my\"]=sueddeutsche_rel[\"month\"]+sueddeutsche_rel[\"year\"]\n",
    "sueddeutsche_rel\n",
    "\n",
    "#change month interger to month name\n",
    "\n",
    "#revert back to interger\n",
    "sueddeutsche_rel['month'] = sueddeutsche_rel['month'].apply(int)\n",
    "\n",
    "sueddeutsche_rel2=sueddeutsche_rel\n",
    "\n",
    "#use calendar to change month number to name\n",
    "sueddeutsche_rel2['month'] = sueddeutsche_rel2['month'].apply(lambda x: calendar.month_name[x])\n",
    "\n",
    "\n",
    "#have a consistent date variable for all datasets\n",
    "sueddeutsche_rel2[\"month-year\"] = sueddeutsche_rel2[\"month\"] +[\" \"]+ sueddeutsche_rel2[\"year\"] \n",
    "\n",
    "\n",
    "#aggregate over months\n",
    "sueddeutsche_rel2=sueddeutsche_rel2.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp\n",
    "sueddeutsche_rel2[\"nlpprocessed\"]=sueddeutsche_rel2['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "sueddeutsche_rel2['liststring'] = [','.join(map(str, l)) for l in sueddeutsche_rel2['nlpprocessed']]\n",
    "sueddeutsche_rel2['liststring'] = (sueddeutsche_rel2['liststring'].replace(',',' ', regex=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [],
   "source": [
    "sueddeutsche_rel_final=sueddeutsche_rel2\n",
    "sueddeutsche_rel_final['positive words'] = sueddeutsche_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "sueddeutsche_rel_final['negative words'] = sueddeutsche_rel_final['liststring'].str.count('|'.join(negative_list1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Flüchtlingspolitik Anzeige Anzeige Anzeige Zeh...</td>\n",
       "      <td>[flüchtlingspolitik, anzeige, anzeige, anzeige...</td>\n",
       "      <td>flüchtlingspolitik anzeige anzeige anzeige zeh...</td>\n",
       "      <td>2177</td>\n",
       "      <td>1751</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>USA: Höchstes Gericht entscheidet über Homo-Eh...</td>\n",
       "      <td>[usa, höchstes, gericht, entscheidet, anzeige,...</td>\n",
       "      <td>usa höchstes gericht entscheidet anzeige anzei...</td>\n",
       "      <td>1499</td>\n",
       "      <td>1414</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Übergriffe in Köln - Maas und de Maizière erle...</td>\n",
       "      <td>[übergriffe, köln, maas, de, maizière, erleich...</td>\n",
       "      <td>übergriffe köln maas de maizière erleichtern a...</td>\n",
       "      <td>9966</td>\n",
       "      <td>7412</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Deutlich mehr Flüchtlinge holen Familien nach ...</td>\n",
       "      <td>[deutlich, mehr, flüchtlinge, holen, familien,...</td>\n",
       "      <td>deutlich mehr flüchtlinge holen familien adblo...</td>\n",
       "      <td>2482</td>\n",
       "      <td>1715</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Flüchtlingspolitik - Integration in Gefahr Sie...</td>\n",
       "      <td>[flüchtlingspolitik, integration, gefahr, adbl...</td>\n",
       "      <td>flüchtlingspolitik integration gefahr adblocke...</td>\n",
       "      <td>2193</td>\n",
       "      <td>1304</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content  \\\n",
       "0  012014  Flüchtlingspolitik Anzeige Anzeige Anzeige Zeh...   \n",
       "1  012015  USA: Höchstes Gericht entscheidet über Homo-Eh...   \n",
       "2  012016  Übergriffe in Köln - Maas und de Maizière erle...   \n",
       "3  012017  Deutlich mehr Flüchtlinge holen Familien nach ...   \n",
       "4  012018  Flüchtlingspolitik - Integration in Gefahr Sie...   \n",
       "\n",
       "                                        nlpprocessed  \\\n",
       "0  [flüchtlingspolitik, anzeige, anzeige, anzeige...   \n",
       "1  [usa, höchstes, gericht, entscheidet, anzeige,...   \n",
       "2  [übergriffe, köln, maas, de, maizière, erleich...   \n",
       "3  [deutlich, mehr, flüchtlinge, holen, familien,...   \n",
       "4  [flüchtlingspolitik, integration, gefahr, adbl...   \n",
       "\n",
       "                                          liststring  positive words  \\\n",
       "0  flüchtlingspolitik anzeige anzeige anzeige zeh...            2177   \n",
       "1  usa höchstes gericht entscheidet anzeige anzei...            1499   \n",
       "2  übergriffe köln maas de maizière erleichtern a...            9966   \n",
       "3  deutlich mehr flüchtlinge holen familien adblo...            2482   \n",
       "4  flüchtlingspolitik integration gefahr adblocke...            2193   \n",
       "\n",
       "   negative words   overall  \n",
       "0            1751  positive  \n",
       "1            1414  positive  \n",
       "2            7412  positive  \n",
       "3            1715  positive  \n",
       "4            1304  positive  "
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(sueddeutsche_rel_final['positive words'] > sueddeutsche_rel_final['negative words']),\n",
    "(sueddeutsche_rel_final['negative words'] > sueddeutsche_rel_final['positive words']),\n",
    "(sueddeutsche_rel_final['negative words'] == sueddeutsche_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "sueddeutsche_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "sueddeutsche_rel_final.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012014</td>\n",
       "      <td>Flüchtlingspolitik Anzeige Anzeige Anzeige Zeh...</td>\n",
       "      <td>flüchtlingspolitik anzeige anzeige anzeige zeh...</td>\n",
       "      <td>2177</td>\n",
       "      <td>1751</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012015</td>\n",
       "      <td>USA: Höchstes Gericht entscheidet über Homo-Eh...</td>\n",
       "      <td>usa höchstes gericht entscheidet anzeige anzei...</td>\n",
       "      <td>1499</td>\n",
       "      <td>1414</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012016</td>\n",
       "      <td>Übergriffe in Köln - Maas und de Maizière erle...</td>\n",
       "      <td>übergriffe köln maas de maizière erleichtern a...</td>\n",
       "      <td>9966</td>\n",
       "      <td>7412</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012017</td>\n",
       "      <td>Deutlich mehr Flüchtlinge holen Familien nach ...</td>\n",
       "      <td>deutlich mehr flüchtlinge holen familien adblo...</td>\n",
       "      <td>2482</td>\n",
       "      <td>1715</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012018</td>\n",
       "      <td>Flüchtlingspolitik - Integration in Gefahr Sie...</td>\n",
       "      <td>flüchtlingspolitik integration gefahr adblocke...</td>\n",
       "      <td>2193</td>\n",
       "      <td>1304</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012014  Flüchtlingspolitik Anzeige Anzeige Anzeige Zeh...   \n",
       "1     012015  USA: Höchstes Gericht entscheidet über Homo-Eh...   \n",
       "2     012016  Übergriffe in Köln - Maas und de Maizière erle...   \n",
       "3     012017  Deutlich mehr Flüchtlinge holen Familien nach ...   \n",
       "4     012018  Flüchtlingspolitik - Integration in Gefahr Sie...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  flüchtlingspolitik anzeige anzeige anzeige zeh...            2177   \n",
       "1  usa höchstes gericht entscheidet anzeige anzei...            1499   \n",
       "2  übergriffe köln maas de maizière erleichtern a...            9966   \n",
       "3  deutlich mehr flüchtlinge holen familien adblo...            2482   \n",
       "4  flüchtlingspolitik integration gefahr adblocke...            2193   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0            1751          positive  \n",
       "1            1414          positive  \n",
       "2            7412          positive  \n",
       "3            1715          positive  \n",
       "4            1304          positive  "
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del sueddeutsche_rel_final['nlpprocessed']\n",
    "sueddeutsche_rel_final = sueddeutsche_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "sueddeutsche_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/sueddeutsche_sentiment.csv\")\n",
    "sueddeutsche_rel_final.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sputnik"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20160107</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>Der Satz „Wir schaffen das“ der Bundeskanzleri...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20150210</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "      <td>In letzter Zeit sei knapp einer halben Million...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20181117</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "      <td>{                    \\vars\\\": {               ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20181109</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "      <td>{                    \\vars\\\": {               ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20160306</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "      <td>{                    \\vars\\\": {               ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       date                                              title  \\\n",
       "0  20160107  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...   \n",
       "1  20150210  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...   \n",
       "2  20181117  So präzisiert Nahles ihre Idee für Hartz IV-Al...   \n",
       "3  20181109  Trump beschränkt drastisch Asylgewährung für i...   \n",
       "4  20160306  Österreich drängt auf deutsches Flüchtlingskon...   \n",
       "\n",
       "                                             content  \n",
       "0  Der Satz „Wir schaffen das“ der Bundeskanzleri...  \n",
       "1  In letzter Zeit sei knapp einer halben Million...  \n",
       "2  {                    \\vars\\\": {               ...  \n",
       "3  {                    \\vars\\\": {               ...  \n",
       "4  {                    \\vars\\\": {               ...  "
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Note: we noticed some dates were scraped wrongly and we have fixed them manually\n",
    "\n",
    "#sputik\n",
    "sputnik_rel = pd.read_csv(\"/Users/ashrakatelshehawy/sputnik_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None)\n",
    "\n",
    "#remove unnecessary sputnik columns\n",
    "sputnik_rel = sputnik_rel.drop(sputnik_rel.columns[[0,1,5,6,7,8]], axis=1)\n",
    "\n",
    "#give column names\n",
    "sputnik_rel.columns = ['date', 'title',\"content\"]\n",
    "\n",
    "sputnik_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "#random sample of 25\n",
    "#sputnik_rel_sample=sputnik_rel.sample(25)\n",
    "#sputnik_rel_sample.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20160107</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20150210</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20181117</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20181109</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20160306</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       date                                            content\n",
       "0  20160107  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...\n",
       "1  20150210  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...\n",
       "2  20181117  So präzisiert Nahles ihre Idee für Hartz IV-Al...\n",
       "3  20181109  Trump beschränkt drastisch Asylgewährung für i...\n",
       "4  20160306  Österreich drängt auf deutsches Flüchtlingskon..."
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# merge title and content in one variable - becaue we will use the whole text to produce the embeddings\n",
    "sputnik_rel[\"content\"]=sputnik_rel[\"title\"]+[\" \"]+sputnik_rel[\"content\"]\n",
    "\n",
    "#remove the title column\n",
    "sputnik_rel.drop(sputnik_rel.columns[1], axis=1, inplace=True)\n",
    "\n",
    "\n",
    "sputnik_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201601</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201502</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201811</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201811</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201603</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content\n",
       "0  201601  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...\n",
       "1  201502  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...\n",
       "2  201811  So präzisiert Nahles ihre Idee für Hartz IV-Al...\n",
       "3  201811  Trump beschränkt drastisch Asylgewährung für i...\n",
       "4  201603  Österreich drängt auf deutsches Flüchtlingskon..."
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#first lets remove the day because we dont need it\n",
    "sputnik_rel[\"date\"] = sputnik_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "\n",
    "sputnik_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "#extract last two digits to change months structure\n",
    "\n",
    "#first change to charachter\n",
    "sputnik_rel['date1'] = sputnik_rel['date'].apply(str)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>date1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201601</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>201601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201502</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "      <td>201502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201811</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "      <td>201811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201811</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "      <td>201811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201603</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "      <td>201603</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3306</th>\n",
       "      <td>201802</td>\n",
       "      <td>EU gibt weitere Milliarden Euro für Bekämpfung...</td>\n",
       "      <td>201802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3307</th>\n",
       "      <td>201805</td>\n",
       "      <td>Angst vor Mega-Unruhen: Israel verlegt Tausend...</td>\n",
       "      <td>201805</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3308</th>\n",
       "      <td>201407</td>\n",
       "      <td>140 Migranten in Griechenland gerettet © Flick...</td>\n",
       "      <td>201407</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3309</th>\n",
       "      <td>201512</td>\n",
       "      <td>Österreich zäunt sich von Migranten ab - VIDEO...</td>\n",
       "      <td>201512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3310</th>\n",
       "      <td>201408</td>\n",
       "      <td>84 Flüchtlinge erreichen mit Booten spanische ...</td>\n",
       "      <td>201408</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3311 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        date                                            content   date1\n",
       "0     201601  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...  201601\n",
       "1     201502  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...  201502\n",
       "2     201811  So präzisiert Nahles ihre Idee für Hartz IV-Al...  201811\n",
       "3     201811  Trump beschränkt drastisch Asylgewährung für i...  201811\n",
       "4     201603  Österreich drängt auf deutsches Flüchtlingskon...  201603\n",
       "...      ...                                                ...     ...\n",
       "3306  201802  EU gibt weitere Milliarden Euro für Bekämpfung...  201802\n",
       "3307  201805  Angst vor Mega-Unruhen: Israel verlegt Tausend...  201805\n",
       "3308  201407  140 Migranten in Griechenland gerettet © Flick...  201407\n",
       "3309  201512  Österreich zäunt sich von Migranten ab - VIDEO...  201512\n",
       "3310  201408  84 Flüchtlinge erreichen mit Booten spanische ...  201408\n",
       "\n",
       "[3311 rows x 3 columns]"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sputnik_rel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>date1</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201601</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>201601</td>\n",
       "      <td>01</td>\n",
       "      <td>2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201502</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "      <td>201502</td>\n",
       "      <td>02</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201811</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "      <td>201811</td>\n",
       "      <td>11</td>\n",
       "      <td>2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201811</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "      <td>201811</td>\n",
       "      <td>11</td>\n",
       "      <td>2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201603</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "      <td>201603</td>\n",
       "      <td>03</td>\n",
       "      <td>2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3306</th>\n",
       "      <td>201802</td>\n",
       "      <td>EU gibt weitere Milliarden Euro für Bekämpfung...</td>\n",
       "      <td>201802</td>\n",
       "      <td>02</td>\n",
       "      <td>2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3307</th>\n",
       "      <td>201805</td>\n",
       "      <td>Angst vor Mega-Unruhen: Israel verlegt Tausend...</td>\n",
       "      <td>201805</td>\n",
       "      <td>05</td>\n",
       "      <td>2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3308</th>\n",
       "      <td>201407</td>\n",
       "      <td>140 Migranten in Griechenland gerettet © Flick...</td>\n",
       "      <td>201407</td>\n",
       "      <td>07</td>\n",
       "      <td>2014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3309</th>\n",
       "      <td>201512</td>\n",
       "      <td>Österreich zäunt sich von Migranten ab - VIDEO...</td>\n",
       "      <td>201512</td>\n",
       "      <td>12</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3310</th>\n",
       "      <td>201408</td>\n",
       "      <td>84 Flüchtlinge erreichen mit Booten spanische ...</td>\n",
       "      <td>201408</td>\n",
       "      <td>08</td>\n",
       "      <td>2014</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3311 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        date                                            content   date1 month  \\\n",
       "0     201601  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...  201601    01   \n",
       "1     201502  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...  201502    02   \n",
       "2     201811  So präzisiert Nahles ihre Idee für Hartz IV-Al...  201811    11   \n",
       "3     201811  Trump beschränkt drastisch Asylgewährung für i...  201811    11   \n",
       "4     201603  Österreich drängt auf deutsches Flüchtlingskon...  201603    03   \n",
       "...      ...                                                ...     ...   ...   \n",
       "3306  201802  EU gibt weitere Milliarden Euro für Bekämpfung...  201802    02   \n",
       "3307  201805  Angst vor Mega-Unruhen: Israel verlegt Tausend...  201805    05   \n",
       "3308  201407  140 Migranten in Griechenland gerettet © Flick...  201407    07   \n",
       "3309  201512  Österreich zäunt sich von Migranten ab - VIDEO...  201512    12   \n",
       "3310  201408  84 Flüchtlinge erreichen mit Booten spanische ...  201408    08   \n",
       "\n",
       "      year  \n",
       "0     2016  \n",
       "1     2015  \n",
       "2     2018  \n",
       "3     2018  \n",
       "4     2016  \n",
       "...    ...  \n",
       "3306  2018  \n",
       "3307  2018  \n",
       "3308  2014  \n",
       "3309  2015  \n",
       "3310  2014  \n",
       "\n",
       "[3311 rows x 5 columns]"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#new column with months and years\n",
    "sputnik_rel['month'] = sputnik_rel['date1'].str[4:6]\n",
    "sputnik_rel['year'] = sputnik_rel['date1'].str[0:4]\n",
    "\n",
    "sputnik_rel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'2010', '2012', '2013', '2014', '2015', '2016', '2017', '2018'}"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "year_list = sputnik_rel['year'].tolist()\n",
    "year_set=set(year_list)\n",
    "year_set #checking if the year is correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "#month data variable\n",
    "sputnik_rel[\"my\"]=sputnik_rel[\"month\"]+sputnik_rel[\"year\"]\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>date1</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "      <th>my</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201601</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>201601</td>\n",
       "      <td>01</td>\n",
       "      <td>2016</td>\n",
       "      <td>012016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201502</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "      <td>201502</td>\n",
       "      <td>02</td>\n",
       "      <td>2015</td>\n",
       "      <td>022015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201811</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "      <td>201811</td>\n",
       "      <td>11</td>\n",
       "      <td>2018</td>\n",
       "      <td>112018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201811</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "      <td>201811</td>\n",
       "      <td>11</td>\n",
       "      <td>2018</td>\n",
       "      <td>112018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201603</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "      <td>201603</td>\n",
       "      <td>03</td>\n",
       "      <td>2016</td>\n",
       "      <td>032016</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content   date1 month  \\\n",
       "0  201601  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...  201601    01   \n",
       "1  201502  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...  201502    02   \n",
       "2  201811  So präzisiert Nahles ihre Idee für Hartz IV-Al...  201811    11   \n",
       "3  201811  Trump beschränkt drastisch Asylgewährung für i...  201811    11   \n",
       "4  201603  Österreich drängt auf deutsches Flüchtlingskon...  201603    03   \n",
       "\n",
       "   year      my  \n",
       "0  2016  012016  \n",
       "1  2015  022015  \n",
       "2  2018  112018  \n",
       "3  2018  112018  \n",
       "4  2016  032016  "
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sputnik_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>date1</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "      <th>my</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201601</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>201601</td>\n",
       "      <td>1</td>\n",
       "      <td>2016</td>\n",
       "      <td>012016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201502</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "      <td>201502</td>\n",
       "      <td>2</td>\n",
       "      <td>2015</td>\n",
       "      <td>022015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201811</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "      <td>201811</td>\n",
       "      <td>11</td>\n",
       "      <td>2018</td>\n",
       "      <td>112018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201811</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "      <td>201811</td>\n",
       "      <td>11</td>\n",
       "      <td>2018</td>\n",
       "      <td>112018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201603</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "      <td>201603</td>\n",
       "      <td>3</td>\n",
       "      <td>2016</td>\n",
       "      <td>032016</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content   date1  month  \\\n",
       "0  201601  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...  201601      1   \n",
       "1  201502  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...  201502      2   \n",
       "2  201811  So präzisiert Nahles ihre Idee für Hartz IV-Al...  201811     11   \n",
       "3  201811  Trump beschränkt drastisch Asylgewährung für i...  201811     11   \n",
       "4  201603  Österreich drängt auf deutsches Flüchtlingskon...  201603      3   \n",
       "\n",
       "   year      my  \n",
       "0  2016  012016  \n",
       "1  2015  022015  \n",
       "2  2018  112018  \n",
       "3  2018  112018  \n",
       "4  2016  032016  "
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#change month interger to month name\n",
    "import calendar\n",
    "\n",
    "#revert back to interger\n",
    "sputnik_rel['month'] = sputnik_rel['month'].apply(int)\n",
    "\n",
    "sputnik_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "month_list = sputnik_rel['month'].tolist()\n",
    "month_set=set(month_list)\n",
    "month_set #checking if the month is correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "sputnik_rel['month'] = sputnik_rel['month'].apply(lambda x: calendar.month_name[x])\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>date1</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "      <th>my</th>\n",
       "      <th>month-year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201601</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>201601</td>\n",
       "      <td>January</td>\n",
       "      <td>2016</td>\n",
       "      <td>012016</td>\n",
       "      <td>January 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201502</td>\n",
       "      <td>Experte: Rund 1,3 Millionen wehrpflichtige Ukr...</td>\n",
       "      <td>201502</td>\n",
       "      <td>February</td>\n",
       "      <td>2015</td>\n",
       "      <td>022015</td>\n",
       "      <td>February 2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201811</td>\n",
       "      <td>So präzisiert Nahles ihre Idee für Hartz IV-Al...</td>\n",
       "      <td>201811</td>\n",
       "      <td>November</td>\n",
       "      <td>2018</td>\n",
       "      <td>112018</td>\n",
       "      <td>November 2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201811</td>\n",
       "      <td>Trump beschränkt drastisch Asylgewährung für i...</td>\n",
       "      <td>201811</td>\n",
       "      <td>November</td>\n",
       "      <td>2018</td>\n",
       "      <td>112018</td>\n",
       "      <td>November 2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201603</td>\n",
       "      <td>Österreich drängt auf deutsches Flüchtlingskon...</td>\n",
       "      <td>201603</td>\n",
       "      <td>March</td>\n",
       "      <td>2016</td>\n",
       "      <td>032016</td>\n",
       "      <td>March 2016</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content   date1  \\\n",
       "0  201601  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...  201601   \n",
       "1  201502  Experte: Rund 1,3 Millionen wehrpflichtige Ukr...  201502   \n",
       "2  201811  So präzisiert Nahles ihre Idee für Hartz IV-Al...  201811   \n",
       "3  201811  Trump beschränkt drastisch Asylgewährung für i...  201811   \n",
       "4  201603  Österreich drängt auf deutsches Flüchtlingskon...  201603   \n",
       "\n",
       "      month  year      my     month-year  \n",
       "0   January  2016  012016   January 2016  \n",
       "1  February  2015  022015  February 2015  \n",
       "2  November  2018  112018  November 2018  \n",
       "3  November  2018  112018  November 2018  \n",
       "4     March  2016  032016     March 2016  "
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sputnik_rel2=sputnik_rel\n",
    "\n",
    "#have a consistent date variable for all datasets\n",
    "sputnik_rel2[\"month-year\"] = sputnik_rel2[\"month\"] +[\" \"]+ sputnik_rel2[\"year\"] \n",
    "\n",
    "sputnik_rel2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012013</td>\n",
       "      <td>Migrationsstrategie: Verantwortung und gesetzl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012014</td>\n",
       "      <td>Großbritannien will EU-Freizügigkeit verhinder...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012015</td>\n",
       "      <td>Migrationsexperte: Zuwanderer tragen zur Finan...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012016</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012017</td>\n",
       "      <td>Kein Wort der Kritik zu Trumps Politik! – Ital...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>122014</td>\n",
       "      <td>Uno dankt Russland für Unterstützung ukrainisc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>122015</td>\n",
       "      <td>Dänemark will die Flüchtlingskonvention ändern...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>122016</td>\n",
       "      <td>Flüchtling drosselte Studentin fast zu Tode – ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>122017</td>\n",
       "      <td>Städte schlagen Alarm: Harte Linie gegen krimi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>122018</td>\n",
       "      <td>Gibt es ein Menschenrecht auf Migration? {    ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>74 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        my                                            content\n",
       "0   012013  Migrationsstrategie: Verantwortung und gesetzl...\n",
       "1   012014  Großbritannien will EU-Freizügigkeit verhinder...\n",
       "2   012015  Migrationsexperte: Zuwanderer tragen zur Finan...\n",
       "3   012016  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...\n",
       "4   012017  Kein Wort der Kritik zu Trumps Politik! – Ital...\n",
       "..     ...                                                ...\n",
       "69  122014  Uno dankt Russland für Unterstützung ukrainisc...\n",
       "70  122015  Dänemark will die Flüchtlingskonvention ändern...\n",
       "71  122016  Flüchtling drosselte Studentin fast zu Tode – ...\n",
       "72  122017  Städte schlagen Alarm: Harte Linie gegen krimi...\n",
       "73  122018  Gibt es ein Menschenrecht auf Migration? {    ...\n",
       "\n",
       "[74 rows x 2 columns]"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sputnik_rel2=sputnik_rel2.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "sputnik_rel2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "sputnik_rel2.to_csv('out.csv')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp\n",
    "sputnik_rel2[\"nlpprocessed\"]=sputnik_rel2['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "sputnik_rel2['liststring'] = [','.join(map(str, l)) for l in sputnik_rel2['nlpprocessed']]\n",
    "sputnik_rel2['liststring'] = (sputnik_rel2['liststring'].replace(',',' ', regex=True))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "sputnik_rel_final=sputnik_rel2\n",
    "sputnik_rel_final['positive words'] = sputnik_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "sputnik_rel_final['negative words'] = sputnik_rel_final['liststring'].str.count('|'.join(negative_list1))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012013</td>\n",
       "      <td>Migrationsstrategie: Verantwortung und gesetzl...</td>\n",
       "      <td>[migrationsstrategie, verantwortung, gesetzlic...</td>\n",
       "      <td>migrationsstrategie verantwortung gesetzliche ...</td>\n",
       "      <td>43</td>\n",
       "      <td>24</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012014</td>\n",
       "      <td>Großbritannien will EU-Freizügigkeit verhinder...</td>\n",
       "      <td>[großbritannien, verhindern, foto, epa, stimme...</td>\n",
       "      <td>großbritannien verhindern foto epa stimme russ...</td>\n",
       "      <td>61</td>\n",
       "      <td>36</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012015</td>\n",
       "      <td>Migrationsexperte: Zuwanderer tragen zur Finan...</td>\n",
       "      <td>[migrationsexperte, zuwanderer, tragen, finanz...</td>\n",
       "      <td>migrationsexperte zuwanderer tragen finanzieru...</td>\n",
       "      <td>22</td>\n",
       "      <td>2</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012016</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>[lindner, merkels, flüchtlingspolitik, führt, ...</td>\n",
       "      <td>lindner merkels flüchtlingspolitik führt europ...</td>\n",
       "      <td>8452</td>\n",
       "      <td>7215</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012017</td>\n",
       "      <td>Kein Wort der Kritik zu Trumps Politik! – Ital...</td>\n",
       "      <td>[wort, kritik, trumps, politik, italienischer,...</td>\n",
       "      <td>wort kritik trumps politik italienischer außen...</td>\n",
       "      <td>1454</td>\n",
       "      <td>1188</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content  \\\n",
       "0  012013  Migrationsstrategie: Verantwortung und gesetzl...   \n",
       "1  012014  Großbritannien will EU-Freizügigkeit verhinder...   \n",
       "2  012015  Migrationsexperte: Zuwanderer tragen zur Finan...   \n",
       "3  012016  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...   \n",
       "4  012017  Kein Wort der Kritik zu Trumps Politik! – Ital...   \n",
       "\n",
       "                                        nlpprocessed  \\\n",
       "0  [migrationsstrategie, verantwortung, gesetzlic...   \n",
       "1  [großbritannien, verhindern, foto, epa, stimme...   \n",
       "2  [migrationsexperte, zuwanderer, tragen, finanz...   \n",
       "3  [lindner, merkels, flüchtlingspolitik, führt, ...   \n",
       "4  [wort, kritik, trumps, politik, italienischer,...   \n",
       "\n",
       "                                          liststring  positive words  \\\n",
       "0  migrationsstrategie verantwortung gesetzliche ...              43   \n",
       "1  großbritannien verhindern foto epa stimme russ...              61   \n",
       "2  migrationsexperte zuwanderer tragen finanzieru...              22   \n",
       "3  lindner merkels flüchtlingspolitik führt europ...            8452   \n",
       "4  wort kritik trumps politik italienischer außen...            1454   \n",
       "\n",
       "   negative words   overall  \n",
       "0              24  positive  \n",
       "1              36  positive  \n",
       "2               2  positive  \n",
       "3            7215  positive  \n",
       "4            1188  positive  "
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(sputnik_rel_final['positive words'] > sputnik_rel_final['negative words']),\n",
    "(sputnik_rel_final['negative words'] > sputnik_rel_final['positive words']),\n",
    "(sputnik_rel_final['negative words'] == sputnik_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "sputnik_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "sputnik_rel_final.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012013</td>\n",
       "      <td>Migrationsstrategie: Verantwortung und gesetzl...</td>\n",
       "      <td>migrationsstrategie verantwortung gesetzliche ...</td>\n",
       "      <td>43</td>\n",
       "      <td>24</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012014</td>\n",
       "      <td>Großbritannien will EU-Freizügigkeit verhinder...</td>\n",
       "      <td>großbritannien verhindern foto epa stimme russ...</td>\n",
       "      <td>61</td>\n",
       "      <td>36</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012015</td>\n",
       "      <td>Migrationsexperte: Zuwanderer tragen zur Finan...</td>\n",
       "      <td>migrationsexperte zuwanderer tragen finanzieru...</td>\n",
       "      <td>22</td>\n",
       "      <td>2</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012016</td>\n",
       "      <td>FDP-Chef Lindner: Merkels Flüchtlingspolitik f...</td>\n",
       "      <td>lindner merkels flüchtlingspolitik führt europ...</td>\n",
       "      <td>8452</td>\n",
       "      <td>7215</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>012017</td>\n",
       "      <td>Kein Wort der Kritik zu Trumps Politik! – Ital...</td>\n",
       "      <td>wort kritik trumps politik italienischer außen...</td>\n",
       "      <td>1454</td>\n",
       "      <td>1188</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012013  Migrationsstrategie: Verantwortung und gesetzl...   \n",
       "1     012014  Großbritannien will EU-Freizügigkeit verhinder...   \n",
       "2     012015  Migrationsexperte: Zuwanderer tragen zur Finan...   \n",
       "3     012016  FDP-Chef Lindner: Merkels Flüchtlingspolitik f...   \n",
       "4     012017  Kein Wort der Kritik zu Trumps Politik! – Ital...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  migrationsstrategie verantwortung gesetzliche ...              43   \n",
       "1  großbritannien verhindern foto epa stimme russ...              61   \n",
       "2  migrationsexperte zuwanderer tragen finanzieru...              22   \n",
       "3  lindner merkels flüchtlingspolitik führt europ...            8452   \n",
       "4  wort kritik trumps politik italienischer außen...            1454   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0              24          positive  \n",
       "1              36          positive  \n",
       "2               2          positive  \n",
       "3            7215          positive  \n",
       "4            1188          positive  "
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del sputnik_rel_final['nlpprocessed']\n",
    "sputnik_rel_final = sputnik_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "sputnik_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/sputnik_sentiment.csv\")\n",
    "sputnik_rel_final.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## RT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "rt = pd.read_csv(\"/Users/ashrakatelshehawy/rt_relevant-migrant-news.csv\", encoding='utf-8', delimiter='\\t',header=None,  error_bad_lines=False)\n",
    "\n",
    "#remove unnesseary rt columns\n",
    "rt_rel = rt.drop(rt.columns[[0,1,5,6,7,8]], axis=1) # remove first column that contains url\n",
    "\n",
    "#rename columns\n",
    "rt_rel.columns = ['date', 'title',\"content\"] \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>title</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>324</th>\n",
       "      <td>20150903</td>\n",
       "      <td>Versuch Nummer Zwei: UN-Sicherheitsrat soll EU...</td>\n",
       "      <td>InternationalChina begeht Ende des Zweiten ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>590</th>\n",
       "      <td>20180531</td>\n",
       "      <td>Polizei schießt auf Kleinbus mit Flüchtlingen ...</td>\n",
       "      <td>vkontaktefacebookyoutubetwittergoogleinstagram...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>503</th>\n",
       "      <td>20180918</td>\n",
       "      <td>Tausende demonstrieren in Frankfurt gegen Flüc...</td>\n",
       "      <td>{                    \"vars\": {...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>20150910</td>\n",
       "      <td>Fluchtziel Deutschland: Kritik an deutscher As...</td>\n",
       "      <td>InternationalUSA: Trotz Offensive der Israe...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>20151023</td>\n",
       "      <td>Friedensnobelpreisträger EU? Schreckliche Szen...</td>\n",
       "      <td>InternationalAssad zu Besuch in Russland – ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date                                              title  \\\n",
       "324  20150903  Versuch Nummer Zwei: UN-Sicherheitsrat soll EU...   \n",
       "590  20180531  Polizei schießt auf Kleinbus mit Flüchtlingen ...   \n",
       "503  20180918  Tausende demonstrieren in Frankfurt gegen Flüc...   \n",
       "90   20150910  Fluchtziel Deutschland: Kritik an deutscher As...   \n",
       "244  20151023  Friedensnobelpreisträger EU? Schreckliche Szen...   \n",
       "\n",
       "                                               content  \n",
       "324     InternationalChina begeht Ende des Zweiten ...  \n",
       "590  vkontaktefacebookyoutubetwittergoogleinstagram...  \n",
       "503                  {                    \"vars\": {...  \n",
       "90      InternationalUSA: Trotz Offensive der Israe...  \n",
       "244     InternationalAssad zu Besuch in Russland – ...  "
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#sample of 25\n",
    "rt_rel_sample=rt_rel.sample(25)\n",
    "rt_rel_sample.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "      <th>my</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201809</td>\n",
       "      <td>Tschechiens Regierungschef Andrej Babiš forder...</td>\n",
       "      <td>9</td>\n",
       "      <td>2018</td>\n",
       "      <td>092018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201702</td>\n",
       "      <td>Frankreich: Flüchtlinge kehren nach Calais zur...</td>\n",
       "      <td>2</td>\n",
       "      <td>2017</td>\n",
       "      <td>022017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201612</td>\n",
       "      <td>Spanien: 67 Migranten aus Haftzentrum für Flüc...</td>\n",
       "      <td>12</td>\n",
       "      <td>2016</td>\n",
       "      <td>122016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201708</td>\n",
       "      <td>UNO lobt iranische Flüchtlingspolitik: Million...</td>\n",
       "      <td>8</td>\n",
       "      <td>2017</td>\n",
       "      <td>082017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201702</td>\n",
       "      <td>Französischer Bauer zu 3.000 Euro Strafe wegen...</td>\n",
       "      <td>2</td>\n",
       "      <td>2017</td>\n",
       "      <td>022017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content  month  year  \\\n",
       "0  201809  Tschechiens Regierungschef Andrej Babiš forder...      9  2018   \n",
       "1  201702  Frankreich: Flüchtlinge kehren nach Calais zur...      2  2017   \n",
       "2  201612  Spanien: 67 Migranten aus Haftzentrum für Flüc...     12  2016   \n",
       "3  201708  UNO lobt iranische Flüchtlingspolitik: Million...      8  2017   \n",
       "4  201702  Französischer Bauer zu 3.000 Euro Strafe wegen...      2  2017   \n",
       "\n",
       "       my  \n",
       "0  092018  \n",
       "1  022017  \n",
       "2  122016  \n",
       "3  082017  \n",
       "4  022017  "
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#merge content and title together in one variable\n",
    "rt_rel[\"content\"]=rt_rel[\"title\"]+[\" \"]+rt_rel[\"content\"]\n",
    "\n",
    "# remove first column that contains url\n",
    "rt_rel = rt_rel.drop(rt_rel.columns[[1]], axis=1) \n",
    "\n",
    "\n",
    "#first lets remove the day\n",
    "rt_rel[\"date\"] = rt_rel[\"date\"].astype(str).str[:-2].astype(np.int64)\n",
    "\n",
    "\n",
    "#first change to charachter\n",
    "rt_rel['date'] = rt_rel['date'].apply(str)\n",
    "\n",
    "#new column with month and year\n",
    "rt_rel['month'] = rt_rel['date'].str[4:6]\n",
    "rt_rel['year'] = rt_rel['date'].str[0:4]\n",
    "\n",
    "rt_rel[\"my\"]=rt_rel[\"month\"]+rt_rel[\"year\"]\n",
    "\n",
    "#change month interger to month name\n",
    "import calendar\n",
    "\n",
    "#revert back to interger\n",
    "rt_rel['month'] = rt_rel['month'].apply(int)\n",
    "rt_rel.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "      <th>my</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201809</td>\n",
       "      <td>Tschechiens Regierungschef Andrej Babiš forder...</td>\n",
       "      <td>September</td>\n",
       "      <td>2018</td>\n",
       "      <td>092018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201702</td>\n",
       "      <td>Frankreich: Flüchtlinge kehren nach Calais zur...</td>\n",
       "      <td>February</td>\n",
       "      <td>2017</td>\n",
       "      <td>022017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201612</td>\n",
       "      <td>Spanien: 67 Migranten aus Haftzentrum für Flüc...</td>\n",
       "      <td>December</td>\n",
       "      <td>2016</td>\n",
       "      <td>122016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201708</td>\n",
       "      <td>UNO lobt iranische Flüchtlingspolitik: Million...</td>\n",
       "      <td>August</td>\n",
       "      <td>2017</td>\n",
       "      <td>082017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201702</td>\n",
       "      <td>Französischer Bauer zu 3.000 Euro Strafe wegen...</td>\n",
       "      <td>February</td>\n",
       "      <td>2017</td>\n",
       "      <td>022017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content      month  year  \\\n",
       "0  201809  Tschechiens Regierungschef Andrej Babiš forder...  September  2018   \n",
       "1  201702  Frankreich: Flüchtlinge kehren nach Calais zur...   February  2017   \n",
       "2  201612  Spanien: 67 Migranten aus Haftzentrum für Flüc...   December  2016   \n",
       "3  201708  UNO lobt iranische Flüchtlingspolitik: Million...     August  2017   \n",
       "4  201702  Französischer Bauer zu 3.000 Euro Strafe wegen...   February  2017   \n",
       "\n",
       "       my  \n",
       "0  092018  \n",
       "1  022017  \n",
       "2  122016  \n",
       "3  082017  \n",
       "4  022017  "
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#change from interger month to name month\n",
    "rt_rel['month'] = rt_rel['month'].apply(lambda x: calendar.month_name[x])\n",
    "rt_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>content</th>\n",
       "      <th>month</th>\n",
       "      <th>year</th>\n",
       "      <th>my</th>\n",
       "      <th>month-year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>201809</td>\n",
       "      <td>Tschechiens Regierungschef Andrej Babiš forder...</td>\n",
       "      <td>September</td>\n",
       "      <td>2018</td>\n",
       "      <td>092018</td>\n",
       "      <td>September 2018</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>201702</td>\n",
       "      <td>Frankreich: Flüchtlinge kehren nach Calais zur...</td>\n",
       "      <td>February</td>\n",
       "      <td>2017</td>\n",
       "      <td>022017</td>\n",
       "      <td>February 2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>201612</td>\n",
       "      <td>Spanien: 67 Migranten aus Haftzentrum für Flüc...</td>\n",
       "      <td>December</td>\n",
       "      <td>2016</td>\n",
       "      <td>122016</td>\n",
       "      <td>December 2016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>201708</td>\n",
       "      <td>UNO lobt iranische Flüchtlingspolitik: Million...</td>\n",
       "      <td>August</td>\n",
       "      <td>2017</td>\n",
       "      <td>082017</td>\n",
       "      <td>August 2017</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>201702</td>\n",
       "      <td>Französischer Bauer zu 3.000 Euro Strafe wegen...</td>\n",
       "      <td>February</td>\n",
       "      <td>2017</td>\n",
       "      <td>022017</td>\n",
       "      <td>February 2017</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     date                                            content      month  year  \\\n",
       "0  201809  Tschechiens Regierungschef Andrej Babiš forder...  September  2018   \n",
       "1  201702  Frankreich: Flüchtlinge kehren nach Calais zur...   February  2017   \n",
       "2  201612  Spanien: 67 Migranten aus Haftzentrum für Flüc...   December  2016   \n",
       "3  201708  UNO lobt iranische Flüchtlingspolitik: Million...     August  2017   \n",
       "4  201702  Französischer Bauer zu 3.000 Euro Strafe wegen...   February  2017   \n",
       "\n",
       "       my      month-year  \n",
       "0  092018  September 2018  \n",
       "1  022017   February 2017  \n",
       "2  122016   December 2016  \n",
       "3  082017     August 2017  \n",
       "4  022017   February 2017  "
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "#have a consistent date variable for all datasets\n",
    "rt_rel[\"month-year\"] = rt_rel[\"month\"] +[\" \"]+ rt_rel[\"year\"] \n",
    "rt_rel.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nach Stopp von Hilfszahlungen – Gaza-Bewohner ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Tschetschenischer Präsident: \"Der Westen schaf...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Madeleine Albright will aus Solidarität mit Fl...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Ungarn: Strafsteuer für auslandsgeförderte Flü...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>022015</td>\n",
       "      <td>Präventive Abschreckung? Österreich droht Koso...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content\n",
       "0  012015  Nach Stopp von Hilfszahlungen – Gaza-Bewohner ...\n",
       "1  012016  Tschetschenischer Präsident: \"Der Westen schaf...\n",
       "2  012017  Madeleine Albright will aus Solidarität mit Fl...\n",
       "3  012018  Ungarn: Strafsteuer für auslandsgeförderte Flü...\n",
       "4  022015  Präventive Abschreckung? Österreich droht Koso..."
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rt_rel = rt_rel.drop(rt_rel.columns[[0, 2, 3,5]], axis=1)  # df.columns is zero-based pd.Index \n",
    "rt_rel3=rt_rel.groupby(['my'])['content'].apply(lambda x: ','.join(x)).reset_index()\n",
    "rt_rel3.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [],
   "source": [
    "#nlp\n",
    "rt_rel3[\"nlpprocessed\"]=rt_rel3['content'].apply(nlp_pipeline)\n",
    "\n",
    "\n",
    "#convert nlpprocessed column to string\n",
    "rt_rel3['liststring'] = [','.join(map(str, l)) for l in rt_rel3['nlpprocessed']]\n",
    "rt_rel3['liststring'] = (rt_rel3['liststring'].replace(',',' ', regex=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "rt_rel_final=rt_rel3\n",
    "rt_rel_final['positive words'] = rt_rel_final['liststring'].str.count('|'.join(positive_list1))\n",
    "rt_rel_final['negative words'] = rt_rel_final['liststring'].str.count('|'.join(negative_list1))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>my</th>\n",
       "      <th>content</th>\n",
       "      <th>nlpprocessed</th>\n",
       "      <th>liststring</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nach Stopp von Hilfszahlungen – Gaza-Bewohner ...</td>\n",
       "      <td>[stopp, hilfszahlungen, greifen, schulz, unter...</td>\n",
       "      <td>stopp hilfszahlungen greifen schulz unterredun...</td>\n",
       "      <td>57</td>\n",
       "      <td>48</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Tschetschenischer Präsident: \"Der Westen schaf...</td>\n",
       "      <td>[tschetschenischer, präsident, westen, schafft...</td>\n",
       "      <td>tschetschenischer präsident westen schafft flü...</td>\n",
       "      <td>110</td>\n",
       "      <td>123</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Madeleine Albright will aus Solidarität mit Fl...</td>\n",
       "      <td>[madeleine, albright, solidarität, flüchtlinge...</td>\n",
       "      <td>madeleine albright solidarität flüchtlingen is...</td>\n",
       "      <td>89</td>\n",
       "      <td>88</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Ungarn: Strafsteuer für auslandsgeförderte Flü...</td>\n",
       "      <td>[ungarn, strafsteuer, auslandsgeförderte, flüc...</td>\n",
       "      <td>ungarn strafsteuer auslandsgeförderte flüchtli...</td>\n",
       "      <td>386</td>\n",
       "      <td>496</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>022015</td>\n",
       "      <td>Präventive Abschreckung? Österreich droht Koso...</td>\n",
       "      <td>[präventive, abschreckung, österreich, droht, ...</td>\n",
       "      <td>präventive abschreckung österreich droht harte...</td>\n",
       "      <td>189</td>\n",
       "      <td>229</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       my                                            content  \\\n",
       "0  012015  Nach Stopp von Hilfszahlungen – Gaza-Bewohner ...   \n",
       "1  012016  Tschetschenischer Präsident: \"Der Westen schaf...   \n",
       "2  012017  Madeleine Albright will aus Solidarität mit Fl...   \n",
       "3  012018  Ungarn: Strafsteuer für auslandsgeförderte Flü...   \n",
       "4  022015  Präventive Abschreckung? Österreich droht Koso...   \n",
       "\n",
       "                                        nlpprocessed  \\\n",
       "0  [stopp, hilfszahlungen, greifen, schulz, unter...   \n",
       "1  [tschetschenischer, präsident, westen, schafft...   \n",
       "2  [madeleine, albright, solidarität, flüchtlinge...   \n",
       "3  [ungarn, strafsteuer, auslandsgeförderte, flüc...   \n",
       "4  [präventive, abschreckung, österreich, droht, ...   \n",
       "\n",
       "                                          liststring  positive words  \\\n",
       "0  stopp hilfszahlungen greifen schulz unterredun...              57   \n",
       "1  tschetschenischer präsident westen schafft flü...             110   \n",
       "2  madeleine albright solidarität flüchtlingen is...              89   \n",
       "3  ungarn strafsteuer auslandsgeförderte flüchtli...             386   \n",
       "4  präventive abschreckung österreich droht harte...             189   \n",
       "\n",
       "   negative words   overall  \n",
       "0              48  positive  \n",
       "1             123  negative  \n",
       "2              88  positive  \n",
       "3             496  negative  \n",
       "4             229  negative  "
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conditions = [\n",
    "(rt_rel_final['positive words'] > rt_rel_final['negative words']),\n",
    "(rt_rel_final['negative words'] > rt_rel_final['positive words']),\n",
    "(rt_rel_final['negative words'] == rt_rel_final['positive words'])\n",
    "]\n",
    "\n",
    "choices = [\n",
    "'positive',\n",
    "'negative',\n",
    "'neutral'\n",
    "]\n",
    "\n",
    "rt_rel_final['overall'] = np.select(conditions, choices, default = '')\n",
    "\n",
    "rt_rel_final.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>month_year</th>\n",
       "      <th>original_title_text</th>\n",
       "      <th>text_procssed_text</th>\n",
       "      <th>positive words</th>\n",
       "      <th>negative words</th>\n",
       "      <th>overall_sentiment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>012015</td>\n",
       "      <td>Nach Stopp von Hilfszahlungen – Gaza-Bewohner ...</td>\n",
       "      <td>stopp hilfszahlungen greifen schulz unterredun...</td>\n",
       "      <td>57</td>\n",
       "      <td>48</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>012016</td>\n",
       "      <td>Tschetschenischer Präsident: \"Der Westen schaf...</td>\n",
       "      <td>tschetschenischer präsident westen schafft flü...</td>\n",
       "      <td>110</td>\n",
       "      <td>123</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>012017</td>\n",
       "      <td>Madeleine Albright will aus Solidarität mit Fl...</td>\n",
       "      <td>madeleine albright solidarität flüchtlingen is...</td>\n",
       "      <td>89</td>\n",
       "      <td>88</td>\n",
       "      <td>positive</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>012018</td>\n",
       "      <td>Ungarn: Strafsteuer für auslandsgeförderte Flü...</td>\n",
       "      <td>ungarn strafsteuer auslandsgeförderte flüchtli...</td>\n",
       "      <td>386</td>\n",
       "      <td>496</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>022015</td>\n",
       "      <td>Präventive Abschreckung? Österreich droht Koso...</td>\n",
       "      <td>präventive abschreckung österreich droht harte...</td>\n",
       "      <td>189</td>\n",
       "      <td>229</td>\n",
       "      <td>negative</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  month_year                                original_title_text  \\\n",
       "0     012015  Nach Stopp von Hilfszahlungen – Gaza-Bewohner ...   \n",
       "1     012016  Tschetschenischer Präsident: \"Der Westen schaf...   \n",
       "2     012017  Madeleine Albright will aus Solidarität mit Fl...   \n",
       "3     012018  Ungarn: Strafsteuer für auslandsgeförderte Flü...   \n",
       "4     022015  Präventive Abschreckung? Österreich droht Koso...   \n",
       "\n",
       "                                  text_procssed_text  positive words  \\\n",
       "0  stopp hilfszahlungen greifen schulz unterredun...              57   \n",
       "1  tschetschenischer präsident westen schafft flü...             110   \n",
       "2  madeleine albright solidarität flüchtlingen is...              89   \n",
       "3  ungarn strafsteuer auslandsgeförderte flüchtli...             386   \n",
       "4  präventive abschreckung österreich droht harte...             189   \n",
       "\n",
       "   negative words overall_sentiment  \n",
       "0              48          positive  \n",
       "1             123          negative  \n",
       "2              88          positive  \n",
       "3             496          negative  \n",
       "4             229          negative  "
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#clean data\n",
    "del rt_rel_final['nlpprocessed']\n",
    "rt_rel_final = rt_rel_final.rename(columns={'content': 'original_title_text', 'liststring': 'text_procssed_text',\"my\":\"month_year\",\"overall\":\"overall_sentiment\"})\n",
    "rt_rel_final.to_csv(\"/Users/ashrakatelshehawy/Downloads/rt_sentiment.csv\")\n",
    "rt_rel_final.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
